added hasPtxFor and isCompatibleWith functions into gpu module, added docs for them
This commit is contained in:
parent
566befe908
commit
f3a2656808
@ -707,28 +707,42 @@ if (WITH_CUDA)
|
||||
message(STATUS "CUDA detected: " ${CUDA_VERSION})
|
||||
|
||||
set(CUDA_COMPUTE_CAPABILITIES " 1.1 1.2 1.3 2.0 " CACHE STRING "Add or remove compute capability")
|
||||
set(CUDA_NVCC_FLAGS_ARCH ${CUDA_COMPUTE_CAPABILITIES})
|
||||
set(CUDA_NVCC_FLAGS_ARCH ${CUDA_COMPUTE_CAPABILITIES})
|
||||
|
||||
string(REGEX MATCH "1\\.0" NVIDIA_CC_10 ${CUDA_COMPUTE_CAPABILITIES})
|
||||
string(REGEX MATCH "1\\.1" NVIDIA_CC_11 ${CUDA_COMPUTE_CAPABILITIES})
|
||||
string(REGEX MATCH "1\\.2" NVIDIA_CC_12 ${CUDA_COMPUTE_CAPABILITIES})
|
||||
string(REGEX MATCH "1\\.3" NVIDIA_CC_13 ${CUDA_COMPUTE_CAPABILITIES})
|
||||
string(REGEX MATCH "2\\.0" NVIDIA_CC_20 ${CUDA_COMPUTE_CAPABILITIES})
|
||||
string(REGEX MATCH "2\\.1" NVIDIA_CC_21 ${CUDA_COMPUTE_CAPABILITIES})
|
||||
|
||||
string(COMPARE EQUAL "1.0" "${NVIDIA_CC_10}" HAVE_PTX_FOR_NVIDIA_CC_10)
|
||||
string(COMPARE EQUAL "1.1" "${NVIDIA_CC_11}" HAVE_PTX_FOR_NVIDIA_CC_11)
|
||||
string(COMPARE EQUAL "1.2" "${NVIDIA_CC_12}" HAVE_PTX_FOR_NVIDIA_CC_12)
|
||||
string(COMPARE EQUAL "1.3" "${NVIDIA_CC_13}" HAVE_PTX_FOR_NVIDIA_CC_13)
|
||||
string(COMPARE EQUAL "2.0" "${NVIDIA_CC_20}" HAVE_PTX_FOR_NVIDIA_CC_20)
|
||||
string(COMPARE EQUAL "2.1" "${NVIDIA_CC_21}" HAVE_PTX_FOR_NVIDIA_CC_21)
|
||||
|
||||
set(CUDA_NVCC_FLAGS_NUM "")
|
||||
set(CUDA_NVCC_FLAGS_NUM "")
|
||||
|
||||
while(NOT ${CUDA_NVCC_FLAGS_ARCH} STREQUAL "")
|
||||
string(REGEX MATCH "[0-9]+.[0-9]+" RESULT_NUM ${CUDA_NVCC_FLAGS_ARCH})
|
||||
string(REGEX MATCHALL "[0-9]" RESULT_STR ${RESULT_NUM})
|
||||
string(REGEX REPLACE ";" "\ " RESULT ${RESULT_STR})
|
||||
list(APPEND CUDA_NVCC_FLAGS_NUM ${RESULT})
|
||||
string(REGEX REPLACE "${RESULT_NUM}" "\ " CUDA_NVCC_FLAGS_ARCH_STR ${CUDA_NVCC_FLAGS_ARCH})
|
||||
string(STRIP ${CUDA_NVCC_FLAGS_ARCH_STR} CUDA_NVCC_FLAGS_ARCH)
|
||||
endwhile()
|
||||
while(NOT ${CUDA_NVCC_FLAGS_ARCH} STREQUAL "")
|
||||
string(REGEX MATCH "[0-9]+.[0-9]+" RESULT_NUM ${CUDA_NVCC_FLAGS_ARCH})
|
||||
string(REGEX MATCHALL "[0-9]" RESULT_STR ${RESULT_NUM})
|
||||
string(REGEX REPLACE ";" "\ " RESULT ${RESULT_STR})
|
||||
list(APPEND CUDA_NVCC_FLAGS_NUM ${RESULT})
|
||||
string(REGEX REPLACE "${RESULT_NUM}" "\ " CUDA_NVCC_FLAGS_ARCH_STR ${CUDA_NVCC_FLAGS_ARCH})
|
||||
string(STRIP ${CUDA_NVCC_FLAGS_ARCH_STR} CUDA_NVCC_FLAGS_ARCH)
|
||||
endwhile()
|
||||
|
||||
set (OpenCV_CUDA_CC "")
|
||||
set (loop_var "")
|
||||
foreach( loop_var IN LISTS CUDA_NVCC_FLAGS_NUM)
|
||||
set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -gencode arch=compute_${loop_var},code=sm_${loop_var})
|
||||
set (OpenCV_CUDA_CC ${OpenCV_CUDA_CC} -gencode arch=compute_${loop_var},code=sm_${loop_var})
|
||||
endforeach()
|
||||
set (OpenCV_CUDA_CC "")
|
||||
set (loop_var "")
|
||||
foreach( loop_var IN LISTS CUDA_NVCC_FLAGS_NUM)
|
||||
set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -gencode arch=compute_${loop_var},code=sm_${loop_var})
|
||||
set (OpenCV_CUDA_CC ${OpenCV_CUDA_CC} -gencode arch=compute_${loop_var},code=sm_${loop_var})
|
||||
endforeach()
|
||||
|
||||
### set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ${OpenCV_COMPUTE_CAPABILITIES})
|
||||
endif()
|
||||
### set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ${OpenCV_COMPUTE_CAPABILITIES})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
||||
|
@ -163,6 +163,24 @@
|
||||
/* NVidia Cuda Runtime API*/
|
||||
#cmakedefine HAVE_CUDA
|
||||
|
||||
/* The project was generated with 1.0 NVIDIA device arch support */
|
||||
#cmakedefine HAVE_PTX_FOR_NVIDIA_CC_10
|
||||
|
||||
/* The project was generated with 1.1 NVIDIA device arch support */
|
||||
#cmakedefine HAVE_PTX_FOR_NVIDIA_CC_11
|
||||
|
||||
/* The project was generated with 1.2 NVIDIA device arch support */
|
||||
#cmakedefine HAVE_PTX_FOR_NVIDIA_CC_12
|
||||
|
||||
/* The project was generated with 1.3 NVIDIA device arch support */
|
||||
#cmakedefine HAVE_PTX_FOR_NVIDIA_CC_13
|
||||
|
||||
/* The project was generated with 2.0 NVIDIA device arch support */
|
||||
#cmakedefine HAVE_PTX_FOR_NVIDIA_CC_20
|
||||
|
||||
/* The project was generated with 2.1 NVIDIA device arch support */
|
||||
#cmakedefine HAVE_PTX_FOR_NVIDIA_CC_21
|
||||
|
||||
/* VideoInput library */
|
||||
#cmakedefine HAVE_VIDEOINPUT
|
||||
|
||||
|
@ -52,7 +52,7 @@ Returns free and total memory for the current device.
|
||||
|
||||
|
||||
\cvCppFunc{gpu::hasNativeDoubleSupport}
|
||||
Returns true if the specified GPU has native double support, false otherwise.
|
||||
Returns true, if the specified GPU has native double support, otherwise false.
|
||||
|
||||
\cvdefCpp{bool hasNativeDoubleSupport(int device);}
|
||||
\begin{description}
|
||||
@ -61,9 +61,31 @@ Returns true if the specified GPU has native double support, false otherwise.
|
||||
|
||||
|
||||
\cvCppFunc{gpu::hasAtomicsSupport}
|
||||
Returns true if the specified GPU has atomics support, false otherwise.
|
||||
Returns true, if the specified GPU has atomics support, otherwise false.
|
||||
|
||||
\cvdefCpp{bool hasAtomicsSupport(int device);}
|
||||
\begin{description}
|
||||
\cvarg{device}{GPU index. Can be obtained via \cvCppCross{gpu::getDevice}.}
|
||||
\end{description}
|
||||
\end{description}
|
||||
|
||||
|
||||
\cvCppFunc{gpu::hasPtxFor}
|
||||
Returns true, if the GPU module was built with PTX support of the given compute capability, otherwise false.
|
||||
|
||||
\cvdefCpp{bool hasPtxFor(int major, int minor);}
|
||||
\begin{description}
|
||||
\cvarg{major}{Major CC version.}
|
||||
\cvarg{minor}{Minor CC version.}
|
||||
\end{description}
|
||||
|
||||
|
||||
\cvCppFunc{gpu::isCompatibleWith}
|
||||
Returns true, if the GPU module is PTX compatible with the given NVIDIA GPU device, otherwise false.
|
||||
|
||||
\cvdefCpp{bool isCompatibleWith(int device);}
|
||||
\begin{description}
|
||||
\cvarg{device}{GPU index. Can be obtained via \cvCppCross{gpu::getDevice}.}
|
||||
\end{description}
|
||||
|
||||
According to the CUDA C Programming Guide Version 3.2: "PTX code produced for some specific compute capability can always be compiled to binary code of greater or equal compute capability".
|
||||
|
||||
|
@ -72,6 +72,12 @@ namespace cv
|
||||
CV_EXPORTS bool hasNativeDoubleSupport(int device);
|
||||
CV_EXPORTS bool hasAtomicsSupport(int device);
|
||||
|
||||
//! Checks if the GPU module was built with PTX support (-arch) of the given CC
|
||||
CV_EXPORTS bool hasPtxFor(int major, int minor);
|
||||
|
||||
//! Checks if the GPU module is PTX compatible with the given NVIDIA device
|
||||
CV_EXPORTS bool isCompatibleWith(int device);
|
||||
|
||||
//////////////////////////////// Error handling ////////////////////////
|
||||
|
||||
CV_EXPORTS void error(const char *error_string, const char *file, const int line, const char *func);
|
||||
|
@ -122,5 +122,57 @@ CV_EXPORTS bool cv::gpu::hasAtomicsSupport(int device)
|
||||
return major > 1 || (major == 1 && minor >= 1);
|
||||
}
|
||||
|
||||
CV_EXPORTS bool cv::gpu::hasPtxFor(int major, int minor)
|
||||
{
|
||||
#ifdef HAVE_PTX_FOR_NVIDIA_CC_10
|
||||
if (major == 1 && minor == 0) return true;
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_PTX_FOR_NVIDIA_CC_11
|
||||
if (major == 1 && minor == 1) return true;
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_PTX_FOR_NVIDIA_CC_12
|
||||
if (major == 1 && minor == 2) return true;
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_PTX_FOR_NVIDIA_CC_13
|
||||
if (major == 1 && minor == 3) return true;
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_PTX_FOR_NVIDIA_CC_20
|
||||
if (major == 2 && minor == 0) return true;
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_PTX_FOR_NVIDIA_CC_21
|
||||
if (major == 2 && minor == 1) return true;
|
||||
#endif
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
CV_EXPORTS bool isCompatibleWith(int device)
|
||||
{
|
||||
// According to the CUDA C Programming Guide Version 3.2: "PTX code
|
||||
// produced for some specific compute capability can always be compiled to
|
||||
// binary code of greater or equal compute capability".
|
||||
|
||||
int major, minor;
|
||||
getComputeCapability(device, major, minor);
|
||||
|
||||
for (; major >= 1; --major)
|
||||
{
|
||||
for (; minor >= 0; --minor)
|
||||
{
|
||||
if (hasPtxFor(major, minor))
|
||||
return true;
|
||||
}
|
||||
minor = 9;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user