added hasPtxFor and isCompatibleWith functions into gpu module, added docs for them

2011-01-18 12:01:28 +00:00 · 2011-01-18 12:01:28 +00:00 · f3a2656808
commit f3a2656808
parent 566befe908
5 changed files with 133 additions and 21 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -707,28 +707,42 @@ if (WITH_CUDA)
        message(STATUS "CUDA detected: " ${CUDA_VERSION})

 		set(CUDA_COMPUTE_CAPABILITIES " 1.1 1.2 1.3 2.0 " CACHE STRING "Add or remove compute capability")
-    set(CUDA_NVCC_FLAGS_ARCH ${CUDA_COMPUTE_CAPABILITIES})
+        set(CUDA_NVCC_FLAGS_ARCH ${CUDA_COMPUTE_CAPABILITIES})
+        
+        string(REGEX MATCH "1\\.0" NVIDIA_CC_10 ${CUDA_COMPUTE_CAPABILITIES})
+        string(REGEX MATCH "1\\.1" NVIDIA_CC_11 ${CUDA_COMPUTE_CAPABILITIES})
+        string(REGEX MATCH "1\\.2" NVIDIA_CC_12 ${CUDA_COMPUTE_CAPABILITIES})
+        string(REGEX MATCH "1\\.3" NVIDIA_CC_13 ${CUDA_COMPUTE_CAPABILITIES})
+        string(REGEX MATCH "2\\.0" NVIDIA_CC_20 ${CUDA_COMPUTE_CAPABILITIES})
+        string(REGEX MATCH "2\\.1" NVIDIA_CC_21 ${CUDA_COMPUTE_CAPABILITIES})
+        
+        string(COMPARE EQUAL "1.0" "${NVIDIA_CC_10}" HAVE_PTX_FOR_NVIDIA_CC_10)
+        string(COMPARE EQUAL "1.1" "${NVIDIA_CC_11}" HAVE_PTX_FOR_NVIDIA_CC_11)
+        string(COMPARE EQUAL "1.2" "${NVIDIA_CC_12}" HAVE_PTX_FOR_NVIDIA_CC_12)
+        string(COMPARE EQUAL "1.3" "${NVIDIA_CC_13}" HAVE_PTX_FOR_NVIDIA_CC_13)
+        string(COMPARE EQUAL "2.0" "${NVIDIA_CC_20}" HAVE_PTX_FOR_NVIDIA_CC_20)
+        string(COMPARE EQUAL "2.1" "${NVIDIA_CC_21}" HAVE_PTX_FOR_NVIDIA_CC_21)

-    set(CUDA_NVCC_FLAGS_NUM "")
+        set(CUDA_NVCC_FLAGS_NUM "")

-    while(NOT ${CUDA_NVCC_FLAGS_ARCH} STREQUAL "")
-        string(REGEX MATCH "[0-9]+.[0-9]+" RESULT_NUM ${CUDA_NVCC_FLAGS_ARCH})
-        string(REGEX MATCHALL "[0-9]" RESULT_STR ${RESULT_NUM})
-        string(REGEX REPLACE ";" "\ " RESULT ${RESULT_STR})
-        list(APPEND CUDA_NVCC_FLAGS_NUM ${RESULT})
-        string(REGEX REPLACE "${RESULT_NUM}" "\ " CUDA_NVCC_FLAGS_ARCH_STR ${CUDA_NVCC_FLAGS_ARCH})
-        string(STRIP ${CUDA_NVCC_FLAGS_ARCH_STR} CUDA_NVCC_FLAGS_ARCH)
-    endwhile()
+        while(NOT ${CUDA_NVCC_FLAGS_ARCH} STREQUAL "")
+            string(REGEX MATCH "[0-9]+.[0-9]+" RESULT_NUM ${CUDA_NVCC_FLAGS_ARCH})
+            string(REGEX MATCHALL "[0-9]" RESULT_STR ${RESULT_NUM})
+            string(REGEX REPLACE ";" "\ " RESULT ${RESULT_STR})
+            list(APPEND CUDA_NVCC_FLAGS_NUM ${RESULT})
+            string(REGEX REPLACE "${RESULT_NUM}" "\ " CUDA_NVCC_FLAGS_ARCH_STR ${CUDA_NVCC_FLAGS_ARCH})
+            string(STRIP ${CUDA_NVCC_FLAGS_ARCH_STR} CUDA_NVCC_FLAGS_ARCH)
+        endwhile()

-    set (OpenCV_CUDA_CC "")
-    set (loop_var "")
-    foreach( loop_var IN LISTS CUDA_NVCC_FLAGS_NUM)
-        set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -gencode arch=compute_${loop_var},code=sm_${loop_var})
-        set (OpenCV_CUDA_CC  ${OpenCV_CUDA_CC}  -gencode arch=compute_${loop_var},code=sm_${loop_var})
-    endforeach()
+        set (OpenCV_CUDA_CC "")
+        set (loop_var "")
+        foreach( loop_var IN LISTS CUDA_NVCC_FLAGS_NUM)
+            set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -gencode arch=compute_${loop_var},code=sm_${loop_var})
+            set (OpenCV_CUDA_CC  ${OpenCV_CUDA_CC}  -gencode arch=compute_${loop_var},code=sm_${loop_var})
+        endforeach()

-    ### set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ${OpenCV_COMPUTE_CAPABILITIES})
-  endif()
+        ### set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ${OpenCV_COMPUTE_CAPABILITIES})
+    endif()
 endif()


--- a/cvconfig.h.cmake
+++ b/cvconfig.h.cmake
@ -163,6 +163,24 @@
 /* NVidia Cuda Runtime API*/
 #cmakedefine HAVE_CUDA

+/* The project was generated with 1.0 NVIDIA device arch support */
+#cmakedefine HAVE_PTX_FOR_NVIDIA_CC_10
+
+/* The project was generated with 1.1 NVIDIA device arch support */
+#cmakedefine HAVE_PTX_FOR_NVIDIA_CC_11
+
+/* The project was generated with 1.2 NVIDIA device arch support */
+#cmakedefine HAVE_PTX_FOR_NVIDIA_CC_12
+
+/* The project was generated with 1.3 NVIDIA device arch support */
+#cmakedefine HAVE_PTX_FOR_NVIDIA_CC_13
+
+/* The project was generated with 2.0 NVIDIA device arch support */
+#cmakedefine HAVE_PTX_FOR_NVIDIA_CC_20
+
+/* The project was generated with 2.1 NVIDIA device arch support */
+#cmakedefine HAVE_PTX_FOR_NVIDIA_CC_21
+
 /* VideoInput library */
 #cmakedefine HAVE_VIDEOINPUT

--- a/doc/gpu_initialization.tex
+++ b/doc/gpu_initialization.tex
@ -52,7 +52,7 @@ Returns free and total memory for the current device.


 \cvCppFunc{gpu::hasNativeDoubleSupport}
-Returns true if the specified GPU has native double support, false otherwise.
+Returns true, if the specified GPU has native double support, otherwise false.

 \cvdefCpp{bool hasNativeDoubleSupport(int device);}
 \begin{description}
@ -61,9 +61,31 @@ Returns true if the specified GPU has native double support, false otherwise.


 \cvCppFunc{gpu::hasAtomicsSupport}
-Returns true if the specified GPU has atomics support, false otherwise.
+Returns true, if the specified GPU has atomics support, otherwise false.

 \cvdefCpp{bool hasAtomicsSupport(int device);}
 \begin{description}
 \cvarg{device}{GPU index. Can be obtained via \cvCppCross{gpu::getDevice}.}
-\end{description} 
+\end{description} 
+
+
+\cvCppFunc{gpu::hasPtxFor}
+Returns true, if the GPU module was built with PTX support of the given compute capability, otherwise false.
+
+\cvdefCpp{bool hasPtxFor(int major, int minor);}
+\begin{description}
+\cvarg{major}{Major CC version.}
+\cvarg{minor}{Minor CC version.}
+\end{description}
+
+
+\cvCppFunc{gpu::isCompatibleWith}
+Returns true, if the GPU module is PTX compatible with the given NVIDIA GPU device, otherwise false.
+
+\cvdefCpp{bool isCompatibleWith(int device);}
+\begin{description}
+\cvarg{device}{GPU index. Can be obtained via \cvCppCross{gpu::getDevice}.}
+\end{description}
+
+According to the CUDA C Programming Guide Version 3.2: "PTX code produced for some specific compute capability can always be compiled to binary code of greater or equal compute capability". 
+
--- a/modules/gpu/include/opencv2/gpu/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu/gpu.hpp
@ -72,6 +72,12 @@ namespace cv
        CV_EXPORTS bool hasNativeDoubleSupport(int device);
        CV_EXPORTS bool hasAtomicsSupport(int device);

+        //! Checks if the GPU module was built with PTX support (-arch) of the given CC
+        CV_EXPORTS bool hasPtxFor(int major, int minor);
+
+        //! Checks if the GPU module is PTX compatible with the given NVIDIA device
+        CV_EXPORTS bool isCompatibleWith(int device);
+
        //////////////////////////////// Error handling ////////////////////////

        CV_EXPORTS void error(const char *error_string, const char *file, const int line, const char *func);
--- a/modules/gpu/src/initialization.cpp
+++ b/modules/gpu/src/initialization.cpp
@ -122,5 +122,57 @@ CV_EXPORTS bool cv::gpu::hasAtomicsSupport(int device)
    return major > 1 || (major == 1 && minor >= 1);
 }

+CV_EXPORTS bool cv::gpu::hasPtxFor(int major, int minor) 
+{
+#ifdef HAVE_PTX_FOR_NVIDIA_CC_10
+    if (major == 1 && minor == 0) return true;
+#endif
+
+#ifdef HAVE_PTX_FOR_NVIDIA_CC_11
+    if (major == 1 && minor == 1) return true;
+#endif
+
+#ifdef HAVE_PTX_FOR_NVIDIA_CC_12
+    if (major == 1 && minor == 2) return true;
+#endif
+
+#ifdef HAVE_PTX_FOR_NVIDIA_CC_13
+    if (major == 1 && minor == 3) return true;
+#endif
+
+#ifdef HAVE_PTX_FOR_NVIDIA_CC_20
+    if (major == 2 && minor == 0) return true;
+#endif
+
+#ifdef HAVE_PTX_FOR_NVIDIA_CC_21
+    if (major == 2 && minor == 1) return true;
+#endif
+
+    return false;
+}
+
+
+CV_EXPORTS bool isCompatibleWith(int device)
+{
+    // According to the CUDA C Programming Guide Version 3.2: "PTX code 
+    // produced for some specific compute capability can always be compiled to
+    // binary code of greater or equal compute capability". 
+
+    int major, minor;
+    getComputeCapability(device, major, minor);
+
+    for (; major >= 1; --major)
+    {
+        for (; minor >= 0; --minor)
+        {
+            if (hasPtxFor(major, minor))
+                return true;
+        }
+        minor = 9;
+    }
+
+    return false;
+}
+
 #endif