added hasPtxFor and isCompatibleWith functions into gpu module, added docs for them

This commit is contained in:
Alexey Spizhevoy 2011-01-18 12:01:28 +00:00
parent 566befe908
commit f3a2656808
5 changed files with 133 additions and 21 deletions

View File

@ -707,28 +707,42 @@ if (WITH_CUDA)
message(STATUS "CUDA detected: " ${CUDA_VERSION})
set(CUDA_COMPUTE_CAPABILITIES " 1.1 1.2 1.3 2.0 " CACHE STRING "Add or remove compute capability")
set(CUDA_NVCC_FLAGS_ARCH ${CUDA_COMPUTE_CAPABILITIES})
set(CUDA_NVCC_FLAGS_ARCH ${CUDA_COMPUTE_CAPABILITIES})
string(REGEX MATCH "1\\.0" NVIDIA_CC_10 ${CUDA_COMPUTE_CAPABILITIES})
string(REGEX MATCH "1\\.1" NVIDIA_CC_11 ${CUDA_COMPUTE_CAPABILITIES})
string(REGEX MATCH "1\\.2" NVIDIA_CC_12 ${CUDA_COMPUTE_CAPABILITIES})
string(REGEX MATCH "1\\.3" NVIDIA_CC_13 ${CUDA_COMPUTE_CAPABILITIES})
string(REGEX MATCH "2\\.0" NVIDIA_CC_20 ${CUDA_COMPUTE_CAPABILITIES})
string(REGEX MATCH "2\\.1" NVIDIA_CC_21 ${CUDA_COMPUTE_CAPABILITIES})
string(COMPARE EQUAL "1.0" "${NVIDIA_CC_10}" HAVE_PTX_FOR_NVIDIA_CC_10)
string(COMPARE EQUAL "1.1" "${NVIDIA_CC_11}" HAVE_PTX_FOR_NVIDIA_CC_11)
string(COMPARE EQUAL "1.2" "${NVIDIA_CC_12}" HAVE_PTX_FOR_NVIDIA_CC_12)
string(COMPARE EQUAL "1.3" "${NVIDIA_CC_13}" HAVE_PTX_FOR_NVIDIA_CC_13)
string(COMPARE EQUAL "2.0" "${NVIDIA_CC_20}" HAVE_PTX_FOR_NVIDIA_CC_20)
string(COMPARE EQUAL "2.1" "${NVIDIA_CC_21}" HAVE_PTX_FOR_NVIDIA_CC_21)
set(CUDA_NVCC_FLAGS_NUM "")
set(CUDA_NVCC_FLAGS_NUM "")
while(NOT ${CUDA_NVCC_FLAGS_ARCH} STREQUAL "")
string(REGEX MATCH "[0-9]+.[0-9]+" RESULT_NUM ${CUDA_NVCC_FLAGS_ARCH})
string(REGEX MATCHALL "[0-9]" RESULT_STR ${RESULT_NUM})
string(REGEX REPLACE ";" "\ " RESULT ${RESULT_STR})
list(APPEND CUDA_NVCC_FLAGS_NUM ${RESULT})
string(REGEX REPLACE "${RESULT_NUM}" "\ " CUDA_NVCC_FLAGS_ARCH_STR ${CUDA_NVCC_FLAGS_ARCH})
string(STRIP ${CUDA_NVCC_FLAGS_ARCH_STR} CUDA_NVCC_FLAGS_ARCH)
endwhile()
while(NOT ${CUDA_NVCC_FLAGS_ARCH} STREQUAL "")
string(REGEX MATCH "[0-9]+.[0-9]+" RESULT_NUM ${CUDA_NVCC_FLAGS_ARCH})
string(REGEX MATCHALL "[0-9]" RESULT_STR ${RESULT_NUM})
string(REGEX REPLACE ";" "\ " RESULT ${RESULT_STR})
list(APPEND CUDA_NVCC_FLAGS_NUM ${RESULT})
string(REGEX REPLACE "${RESULT_NUM}" "\ " CUDA_NVCC_FLAGS_ARCH_STR ${CUDA_NVCC_FLAGS_ARCH})
string(STRIP ${CUDA_NVCC_FLAGS_ARCH_STR} CUDA_NVCC_FLAGS_ARCH)
endwhile()
set (OpenCV_CUDA_CC "")
set (loop_var "")
foreach( loop_var IN LISTS CUDA_NVCC_FLAGS_NUM)
set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -gencode arch=compute_${loop_var},code=sm_${loop_var})
set (OpenCV_CUDA_CC ${OpenCV_CUDA_CC} -gencode arch=compute_${loop_var},code=sm_${loop_var})
endforeach()
set (OpenCV_CUDA_CC "")
set (loop_var "")
foreach( loop_var IN LISTS CUDA_NVCC_FLAGS_NUM)
set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -gencode arch=compute_${loop_var},code=sm_${loop_var})
set (OpenCV_CUDA_CC ${OpenCV_CUDA_CC} -gencode arch=compute_${loop_var},code=sm_${loop_var})
endforeach()
### set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ${OpenCV_COMPUTE_CAPABILITIES})
endif()
### set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ${OpenCV_COMPUTE_CAPABILITIES})
endif()
endif()

View File

@ -163,6 +163,24 @@
/* NVidia Cuda Runtime API*/
#cmakedefine HAVE_CUDA
/* The project was generated with 1.0 NVIDIA device arch support */
#cmakedefine HAVE_PTX_FOR_NVIDIA_CC_10
/* The project was generated with 1.1 NVIDIA device arch support */
#cmakedefine HAVE_PTX_FOR_NVIDIA_CC_11
/* The project was generated with 1.2 NVIDIA device arch support */
#cmakedefine HAVE_PTX_FOR_NVIDIA_CC_12
/* The project was generated with 1.3 NVIDIA device arch support */
#cmakedefine HAVE_PTX_FOR_NVIDIA_CC_13
/* The project was generated with 2.0 NVIDIA device arch support */
#cmakedefine HAVE_PTX_FOR_NVIDIA_CC_20
/* The project was generated with 2.1 NVIDIA device arch support */
#cmakedefine HAVE_PTX_FOR_NVIDIA_CC_21
/* VideoInput library */
#cmakedefine HAVE_VIDEOINPUT

View File

@ -52,7 +52,7 @@ Returns free and total memory for the current device.
\cvCppFunc{gpu::hasNativeDoubleSupport}
Returns true if the specified GPU has native double support, false otherwise.
Returns true, if the specified GPU has native double support, otherwise false.
\cvdefCpp{bool hasNativeDoubleSupport(int device);}
\begin{description}
@ -61,9 +61,31 @@ Returns true if the specified GPU has native double support, false otherwise.
\cvCppFunc{gpu::hasAtomicsSupport}
Returns true if the specified GPU has atomics support, false otherwise.
Returns true, if the specified GPU has atomics support, otherwise false.
\cvdefCpp{bool hasAtomicsSupport(int device);}
\begin{description}
\cvarg{device}{GPU index. Can be obtained via \cvCppCross{gpu::getDevice}.}
\end{description}
\end{description}
\cvCppFunc{gpu::hasPtxFor}
Returns true, if the GPU module was built with PTX support of the given compute capability, otherwise false.
\cvdefCpp{bool hasPtxFor(int major, int minor);}
\begin{description}
\cvarg{major}{Major CC version.}
\cvarg{minor}{Minor CC version.}
\end{description}
\cvCppFunc{gpu::isCompatibleWith}
Returns true, if the GPU module is PTX compatible with the given NVIDIA GPU device, otherwise false.
\cvdefCpp{bool isCompatibleWith(int device);}
\begin{description}
\cvarg{device}{GPU index. Can be obtained via \cvCppCross{gpu::getDevice}.}
\end{description}
According to the CUDA C Programming Guide Version 3.2: "PTX code produced for some specific compute capability can always be compiled to binary code of greater or equal compute capability".

View File

@ -72,6 +72,12 @@ namespace cv
CV_EXPORTS bool hasNativeDoubleSupport(int device);
CV_EXPORTS bool hasAtomicsSupport(int device);
//! Checks if the GPU module was built with PTX support (-arch) of the given CC
CV_EXPORTS bool hasPtxFor(int major, int minor);
//! Checks if the GPU module is PTX compatible with the given NVIDIA device
CV_EXPORTS bool isCompatibleWith(int device);
//////////////////////////////// Error handling ////////////////////////
CV_EXPORTS void error(const char *error_string, const char *file, const int line, const char *func);

View File

@ -122,5 +122,57 @@ CV_EXPORTS bool cv::gpu::hasAtomicsSupport(int device)
return major > 1 || (major == 1 && minor >= 1);
}
CV_EXPORTS bool cv::gpu::hasPtxFor(int major, int minor)
{
#ifdef HAVE_PTX_FOR_NVIDIA_CC_10
if (major == 1 && minor == 0) return true;
#endif
#ifdef HAVE_PTX_FOR_NVIDIA_CC_11
if (major == 1 && minor == 1) return true;
#endif
#ifdef HAVE_PTX_FOR_NVIDIA_CC_12
if (major == 1 && minor == 2) return true;
#endif
#ifdef HAVE_PTX_FOR_NVIDIA_CC_13
if (major == 1 && minor == 3) return true;
#endif
#ifdef HAVE_PTX_FOR_NVIDIA_CC_20
if (major == 2 && minor == 0) return true;
#endif
#ifdef HAVE_PTX_FOR_NVIDIA_CC_21
if (major == 2 && minor == 1) return true;
#endif
return false;
}
CV_EXPORTS bool isCompatibleWith(int device)
{
// According to the CUDA C Programming Guide Version 3.2: "PTX code
// produced for some specific compute capability can always be compiled to
// binary code of greater or equal compute capability".
int major, minor;
getComputeCapability(device, major, minor);
for (; major >= 1; --major)
{
for (; minor >= 0; --minor)
{
if (hasPtxFor(major, minor))
return true;
}
minor = 9;
}
return false;
}
#endif