updated main CMakeLists.txt gpu module section, now user can manage binary and intermediate code versions of the gpu module image
added more functions to check version of gpu code in runtime
This commit is contained in:
parent
1e1a139270
commit
8779306800
@ -700,48 +700,55 @@ endif()
|
||||
|
||||
############################### CUDA ################################
|
||||
|
||||
if (WITH_CUDA)
|
||||
if(WITH_CUDA)
|
||||
find_package(CUDA 3.2)
|
||||
if (CUDA_FOUND)
|
||||
|
||||
if(CUDA_FOUND)
|
||||
set(HAVE_CUDA 1)
|
||||
message(STATUS "CUDA detected: " ${CUDA_VERSION})
|
||||
|
||||
set(CUDA_COMPUTE_CAPABILITIES " 1.1 1.2 1.3 2.0 " CACHE STRING "Add or remove compute capability")
|
||||
set(CUDA_NVCC_FLAGS_ARCH ${CUDA_COMPUTE_CAPABILITIES})
|
||||
set(CUDA_ARCH_GPU "1.1 1.2 1.3 2.0" CACHE STRING "Specify 'real' GPU architectures to build binaries for")
|
||||
set(CUDA_ARCH_PTX "1.1 1.3 2.0" CACHE STRING "Specify 'virtual' PTX architectures to build PTX intermediate code for")
|
||||
|
||||
string(REGEX MATCH "1\\.0" STR_OPENCV_GPU_CUDA_ARCH_10 ${CUDA_COMPUTE_CAPABILITIES})
|
||||
string(REGEX MATCH "1\\.1" STR_OPENCV_GPU_CUDA_ARCH_11 ${CUDA_COMPUTE_CAPABILITIES})
|
||||
string(REGEX MATCH "1\\.2" STR_OPENCV_GPU_CUDA_ARCH_12 ${CUDA_COMPUTE_CAPABILITIES})
|
||||
string(REGEX MATCH "1\\.3" STR_OPENCV_GPU_CUDA_ARCH_13 ${CUDA_COMPUTE_CAPABILITIES})
|
||||
string(REGEX MATCH "2\\.0" STR_OPENCV_GPU_CUDA_ARCH_20 ${CUDA_COMPUTE_CAPABILITIES})
|
||||
string(REGEX MATCH "2\\.1" STR_OPENCV_GPU_CUDA_ARCH_21 ${CUDA_COMPUTE_CAPABILITIES})
|
||||
# Architectures to be searched for in user's input
|
||||
set (CUDA_ARCH_ALL 1.0 1.1 1.2 1.3 2.0 2.1)
|
||||
|
||||
string(COMPARE EQUAL "1.0" "${STR_OPENCV_GPU_CUDA_ARCH_10}" OPENCV_GPU_CUDA_ARCH_10)
|
||||
string(COMPARE EQUAL "1.1" "${STR_OPENCV_GPU_CUDA_ARCH_11}" OPENCV_GPU_CUDA_ARCH_11)
|
||||
string(COMPARE EQUAL "1.2" "${STR_OPENCV_GPU_CUDA_ARCH_12}" OPENCV_GPU_CUDA_ARCH_12)
|
||||
string(COMPARE EQUAL "1.3" "${STR_OPENCV_GPU_CUDA_ARCH_13}" OPENCV_GPU_CUDA_ARCH_13)
|
||||
string(COMPARE EQUAL "2.0" "${STR_OPENCV_GPU_CUDA_ARCH_20}" OPENCV_GPU_CUDA_ARCH_20)
|
||||
string(COMPARE EQUAL "2.1" "${STR_OPENCV_GPU_CUDA_ARCH_21}" OPENCV_GPU_CUDA_ARCH_21)
|
||||
|
||||
set(CUDA_NVCC_FLAGS_NUM "")
|
||||
|
||||
while(NOT ${CUDA_NVCC_FLAGS_ARCH} STREQUAL "")
|
||||
string(REGEX MATCH "[0-9]+.[0-9]+" RESULT_NUM ${CUDA_NVCC_FLAGS_ARCH})
|
||||
string(REGEX MATCHALL "[0-9]" RESULT_STR ${RESULT_NUM})
|
||||
string(REGEX REPLACE ";" "\ " RESULT ${RESULT_STR})
|
||||
list(APPEND CUDA_NVCC_FLAGS_NUM ${RESULT})
|
||||
string(REGEX REPLACE "${RESULT_NUM}" "\ " CUDA_NVCC_FLAGS_ARCH_STR ${CUDA_NVCC_FLAGS_ARCH})
|
||||
string(STRIP ${CUDA_NVCC_FLAGS_ARCH_STR} CUDA_NVCC_FLAGS_ARCH)
|
||||
endwhile()
|
||||
|
||||
set (OpenCV_CUDA_CC "")
|
||||
set (loop_var "")
|
||||
foreach( loop_var IN LISTS CUDA_NVCC_FLAGS_NUM)
|
||||
set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -gencode arch=compute_${loop_var},code=sm_${loop_var})
|
||||
set (OpenCV_CUDA_CC ${OpenCV_CUDA_CC} -gencode arch=compute_${loop_var},code=sm_${loop_var})
|
||||
# Parse user's input
|
||||
foreach(ARCH IN LISTS CUDA_ARCH_ALL)
|
||||
string(REGEX MATCH ${ARCH} ARCH_GPU_MATCH "${CUDA_ARCH_GPU}")
|
||||
string(REGEX MATCH ${ARCH} ARCH_PTX_MATCH "${CUDA_ARCH_PTX}")
|
||||
string(REGEX REPLACE "\\." "" ARCH_GPU_AS_NUM "${ARCH_GPU_MATCH}")
|
||||
string(REGEX REPLACE "\\." "" ARCH_PTX_AS_NUM "${ARCH_PTX_MATCH}")
|
||||
|
||||
# Define variables indicating the architectures specified by user
|
||||
if(NOT ${ARCH_GPU_AS_NUM} STREQUAL "")
|
||||
set(OPENCV_ARCH_GPU_${ARCH_GPU_AS_NUM} 1)
|
||||
endif()
|
||||
if(NOT ${ARCH_PTX_AS_NUM} STREQUAL "")
|
||||
set(OPENCV_ARCH_PTX_${ARCH_PTX_AS_NUM} 1)
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
set(NVCC_FLAGS_EXTRA "")
|
||||
|
||||
# Tell nvcc to add binaries for the specified GPUs
|
||||
string(REGEX REPLACE "\\." "" CUDA_ARCH_GPU "${CUDA_ARCH_GPU}")
|
||||
string(REGEX MATCHALL "[0-9]+" CUDA_ARCH_GPU_LIST "${CUDA_ARCH_GPU}")
|
||||
foreach(ARCH_GPU IN LISTS CUDA_ARCH_GPU_LIST)
|
||||
set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -gencode arch=compute_${ARCH_GPU},code=sm_${ARCH_GPU})
|
||||
endforeach()
|
||||
|
||||
### set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ${OpenCV_COMPUTE_CAPABILITIES})
|
||||
|
||||
# Tell nvcc to add PTX intermediate code for the specified architectures
|
||||
string(REGEX REPLACE "\\." "" CUDA_ARCH_PTX "${CUDA_ARCH_PTX}")
|
||||
string(REGEX MATCHALL "[0-9]+" CUDA_ARCH_PTX_LIST "${CUDA_ARCH_PTX}")
|
||||
foreach(ARCH_PTX IN LISTS CUDA_ARCH_PTX_LIST)
|
||||
set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -gencode arch=compute_${ARCH_PTX},code=compute_${ARCH_PTX})
|
||||
endforeach()
|
||||
|
||||
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ${NVCC_FLAGS_EXTRA})
|
||||
message(STATUS "CUDA NVCC flags: ${CUDA_NVCC_FLAGS}")
|
||||
|
||||
set(OpenCV_CUDA_CC "${NVCC_FLAGS_EXTRA}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
@ -163,23 +163,21 @@
|
||||
/* NVidia Cuda Runtime API*/
|
||||
#cmakedefine HAVE_CUDA
|
||||
|
||||
/* The project was generated with 1.0 NVIDIA device arch support */
|
||||
#cmakedefine OPENCV_GPU_CUDA_ARCH_10
|
||||
/* Compile for 'real' NVIDIA GPU architecture */
|
||||
#cmakedefine OPENCV_ARCH_GPU_10
|
||||
#cmakedefine OPENCV_ARCH_GPU_11
|
||||
#cmakedefine OPENCV_ARCH_GPU_12
|
||||
#cmakedefine OPENCV_ARCH_GPU_13
|
||||
#cmakedefine OPENCV_ARCH_GPU_20
|
||||
#cmakedefine OPENCV_ARCH_GPU_21
|
||||
|
||||
/* The project was generated with 1.1 NVIDIA device arch support */
|
||||
#cmakedefine OPENCV_GPU_CUDA_ARCH_11
|
||||
|
||||
/* The project was generated with 1.2 NVIDIA device arch support */
|
||||
#cmakedefine OPENCV_GPU_CUDA_ARCH_12
|
||||
|
||||
/* The project was generated with 1.3 NVIDIA device arch support */
|
||||
#cmakedefine OPENCV_GPU_CUDA_ARCH_13
|
||||
|
||||
/* The project was generated with 2.0 NVIDIA device arch support */
|
||||
#cmakedefine OPENCV_GPU_CUDA_ARCH_20
|
||||
|
||||
/* The project was generated with 2.1 NVIDIA device arch support */
|
||||
#cmakedefine OPENCV_GPU_CUDA_ARCH_21
|
||||
/* Compile for 'virtual' NVIDIA PTX architecture */
|
||||
#cmakedefine OPENCV_ARCH_PTX_10
|
||||
#cmakedefine OPENCV_ARCH_PTX_11
|
||||
#cmakedefine OPENCV_ARCH_PTX_12
|
||||
#cmakedefine OPENCV_ARCH_PTX_13
|
||||
#cmakedefine OPENCV_ARCH_PTX_20
|
||||
#cmakedefine OPENCV_ARCH_PTX_21
|
||||
|
||||
/* VideoInput library */
|
||||
#cmakedefine HAVE_VIDEOINPUT
|
||||
|
@ -69,30 +69,70 @@ Returns true, if the specified GPU has atomics support, otherwise false.
|
||||
\end{description}
|
||||
|
||||
|
||||
\cvCppFunc{gpu::ptxVersionIs}
|
||||
Returns true, if the GPU module was built with PTX support of the given compute capability, otherwise false.
|
||||
\cvCppFunc{gpu::hasPtxVersion}
|
||||
Returns true, if the GPU module has PTX code for the given architecture, otherwise false.
|
||||
|
||||
\cvdefCpp{bool ptxVersionIs(int major, int minor);}
|
||||
\cvdefCpp{bool hasPtxVersion(int major, int minor);}
|
||||
\begin{description}
|
||||
\cvarg{major}{Major compute capability version.}
|
||||
\cvarg{minor}{Minor compute capability version.}
|
||||
\end{description}
|
||||
|
||||
|
||||
\cvCppFunc{gpu::ptxVersionIsLessOrEqual}
|
||||
Returns true, if the GPU module was built with PTX support of the given compute capability or less, otherwise false.
|
||||
\cvCppFunc{gpu::hasLessOrEqualPtxVersion}
|
||||
Returns true, if the GPU module has PTX code for the given architecture or older one, otherwise false.
|
||||
|
||||
\cvdefCpp{bool ptxVersionIsLessOrEqual(int major, int minor);}
|
||||
\cvdefCpp{bool hasLessOrEqualPtxVersion(int major, int minor);}
|
||||
\begin{description}
|
||||
\cvarg{major}{Major compute capability version.}
|
||||
\cvarg{minor}{Minor compute capability version.}
|
||||
\end{description}
|
||||
|
||||
|
||||
\cvCppFunc{gpu::ptxVersionIsGreaterOrEqual}
|
||||
Returns true, if the GPU module was built with PTX support of the given compute capability or greater, otherwise false.
|
||||
\cvCppFunc{gpu::hasGreaterOrEqualPtxVersion}
|
||||
Returns true, if the GPU module has PTX code for the given architecture or newer one, otherwise false.
|
||||
|
||||
\cvdefCpp{bool ptxVersionIsGreaterOrEqual(int major, int minor);}
|
||||
\cvdefCpp{bool hasGreaterOrEqualPtxVersion(int major, int minor);}
|
||||
\begin{description}
|
||||
\cvarg{major}{Major compute capability version.}
|
||||
\cvarg{minor}{Minor compute capability version.}
|
||||
\end{description}
|
||||
|
||||
|
||||
\cvCppFunc{gpu::hasCubinVersion}
|
||||
Returns true, if the GPU module has CUBIN code for the given architecture, otherwise false.
|
||||
|
||||
\cvdefCpp{bool hasCubinVersion(int major, int minor);}
|
||||
\begin{description}
|
||||
\cvarg{major}{Major compute capability version.}
|
||||
\cvarg{minor}{Minor compute capability version.}
|
||||
\end{description}
|
||||
|
||||
|
||||
\cvCppFunc{gpu::hasGreaterOrEqualCubinVersion}
|
||||
Returns true, if the GPU module has CUBIN code for the given architecture or newer one, otherwise false.
|
||||
|
||||
\cvdefCpp{bool hasGreaterOrEqualCubinVersion(int major, int minor);}
|
||||
\begin{description}
|
||||
\cvarg{major}{Major compute capability version.}
|
||||
\cvarg{minor}{Minor compute capability version.}
|
||||
\end{description}
|
||||
|
||||
|
||||
\cvCppFunc{gpu::hasVersion}
|
||||
Returns true, if the GPU module has PTX or CUBIN code for the given architecture, otherwise false.
|
||||
|
||||
\cvdefCpp{bool hasVersion(int major, int minor);}
|
||||
\begin{description}
|
||||
\cvarg{major}{Major compute capability version.}
|
||||
\cvarg{minor}{Minor compute capability version.}
|
||||
\end{description}
|
||||
|
||||
|
||||
\cvCppFunc{gpu::hasGreaterOrEqualVersion}
|
||||
Returns true, if the GPU module has PTX or CUBIN code for the given architecture or newer one, otherwise false.
|
||||
|
||||
\cvdefCpp{bool hasGreaterOrEqualVersion(int major, int minor);}
|
||||
\begin{description}
|
||||
\cvarg{major}{Major compute capability version.}
|
||||
\cvarg{minor}{Minor compute capability version.}
|
||||
@ -100,7 +140,7 @@ Returns true, if the GPU module was built with PTX support of the given compute
|
||||
|
||||
|
||||
\cvCppFunc{gpu::isCompatibleWith}
|
||||
Returns true, if the GPU module is PTX compatible with the given NVIDIA GPU device, otherwise false.
|
||||
Returns true, if the GPU module is PTX or CUBIN compatible with the given GPU device, otherwise false.
|
||||
|
||||
\cvdefCpp{bool isCompatibleWith(int device);}
|
||||
\begin{description}
|
||||
|
@ -72,11 +72,16 @@ namespace cv
|
||||
CV_EXPORTS bool hasNativeDoubleSupport(int device);
|
||||
CV_EXPORTS bool hasAtomicsSupport(int device);
|
||||
|
||||
CV_EXPORTS bool ptxVersionIs(int major, int minor);
|
||||
CV_EXPORTS bool ptxVersionIsLessOrEqual(int major, int minor);
|
||||
CV_EXPORTS bool ptxVersionIsGreaterOrEqual(int major, int minor);
|
||||
CV_EXPORTS bool hasPtxVersion(int major, int minor);
|
||||
CV_EXPORTS bool hasLessOrEqualPtxVersion(int major, int minor);
|
||||
CV_EXPORTS bool hasGreaterOrEqualPtxVersion(int major, int minor);
|
||||
|
||||
CV_EXPORTS bool hasCubinVersion(int major, int minor);
|
||||
CV_EXPORTS bool hasGreaterOrEqualCubinVersion(int major, int minor);
|
||||
|
||||
CV_EXPORTS bool hasVersion(int major, int minor);
|
||||
CV_EXPORTS bool hasGreaterOrEqualVersion(int major, int minor);
|
||||
|
||||
//! Checks if the GPU module is PTX compatible with the given NVIDIA device
|
||||
CV_EXPORTS bool isCompatibleWith(int device);
|
||||
|
||||
//////////////////////////////// Error handling ////////////////////////
|
||||
|
@ -57,9 +57,13 @@ CV_EXPORTS int cv::gpu::getNumberOfSMs(int /*device*/) { throw_nogpu(); return 0
|
||||
CV_EXPORTS void cv::gpu::getGpuMemInfo(size_t& /*free*/, size_t& /*total*/) { throw_nogpu(); }
|
||||
CV_EXPORTS bool cv::gpu::hasNativeDoubleSupport(int /*device*/) { throw_nogpu(); return false; }
|
||||
CV_EXPORTS bool cv::gpu::hasAtomicsSupport(int /*device*/) { throw_nogpu(); return false; }
|
||||
CV_EXPORTS bool cv::gpu::ptxVersionIs(int major, int minor) { throw_nogpu(); return false; }
|
||||
CV_EXPORTS bool cv::gpu::ptxVersionIsLessOrEqual(int major, int minor) { throw_nogpu(); return false; }
|
||||
CV_EXPORTS bool cv::gpu::ptxVersionIsGreaterOrEqual(int major, int minor) { throw_nogpu(); return false; }
|
||||
CV_EXPORTS bool cv::gpu::hasPtxVersion(int major, int minor) { throw_nogpu(); return false; }
|
||||
CV_EXPORTS bool cv::gpu::hasLessOrEqualPtxVersion(int major, int minor) { throw_nogpu(); return false; }
|
||||
CV_EXPORTS bool cv::gpu::hasGreaterOrEqualPtxVersion(int major, int minor) { throw_nogpu(); return false; }
|
||||
CV_EXPORTS bool cv::gpu::hasCubinVersion(int major, int minor) { throw_nogpu(); return false; }
|
||||
CV_EXPORTS bool cv::gpu::hasGreaterOrEqualCubinVersion(int major, int minor) { throw_nogpu(); return false; }
|
||||
CV_EXPORTS bool cv::gpu::hasVersion(int major, int minor) { throw_nogpu(); return false; }
|
||||
CV_EXPORTS bool cv::gpu::hasGreaterOrEqualVersion(int major, int minor) { throw_nogpu(); return false; }
|
||||
CV_EXPORTS bool cv::gpu::isCompatibleWith(int device) { throw_nogpu(); return false; }
|
||||
|
||||
|
||||
@ -140,37 +144,63 @@ namespace
|
||||
template <typename Comparer>
|
||||
bool checkPtxVersion(int major, int minor, Comparer cmp)
|
||||
{
|
||||
#ifdef OPENCV_GPU_CUDA_ARCH_10
|
||||
#ifdef OPENCV_ARCH_PTX_10
|
||||
if (cmp(1, 0, major, minor)) return true;
|
||||
#endif
|
||||
|
||||
#ifdef OPENCV_GPU_CUDA_ARCH_11
|
||||
#ifdef OPENCV_ARCH_PTX_11
|
||||
if (cmp(1, 1, major, minor)) return true;
|
||||
#endif
|
||||
|
||||
#ifdef OPENCV_GPU_CUDA_ARCH_12
|
||||
#ifdef OPENCV_ARCH_PTX_12
|
||||
if (cmp(1, 2, major, minor)) return true;
|
||||
#endif
|
||||
|
||||
#ifdef OPENCV_GPU_CUDA_ARCH_13
|
||||
#ifdef OPENCV_ARCH_PTX_13
|
||||
if (cmp(1, 3, major, minor)) return true;
|
||||
#endif
|
||||
|
||||
#ifdef OPENCV_GPU_CUDA_ARCH_20
|
||||
#ifdef OPENCV_ARCH_PTX_20
|
||||
if (cmp(2, 0, major, minor)) return true;
|
||||
#endif
|
||||
|
||||
#ifdef OPENCV_GPU_CUDA_ARCH_21
|
||||
#ifdef OPENCV_ARCH_PTX_21
|
||||
if (cmp(2, 1, major, minor)) return true;
|
||||
#endif
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Comparer>
|
||||
bool checkCubinVersion(int major, int minor, Comparer cmp)
|
||||
{
|
||||
#ifdef OPENCV_ARCH_GPU_10
|
||||
if (cmp(1, 0, major, minor)) return true;
|
||||
#endif
|
||||
|
||||
#ifdef OPENCV_ARCH_GPU_11
|
||||
if (cmp(1, 1, major, minor)) return true;
|
||||
#endif
|
||||
|
||||
#ifdef OPENCV_ARCH_GPU_12
|
||||
if (cmp(1, 2, major, minor)) return true;
|
||||
#endif
|
||||
|
||||
#ifdef OPENCV_ARCH_GPU_13
|
||||
if (cmp(1, 3, major, minor)) return true;
|
||||
#endif
|
||||
|
||||
#ifdef OPENCV_ARCH_GPU_20
|
||||
if (cmp(2, 0, major, minor)) return true;
|
||||
#endif
|
||||
|
||||
#ifdef OPENCV_ARCH_GPU_21
|
||||
if (cmp(2, 1, major, minor)) return true;
|
||||
#endif
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
CV_EXPORTS bool cv::gpu::ptxVersionIs(int major, int minor)
|
||||
{
|
||||
struct ComparerEqual
|
||||
{
|
||||
bool operator()(int lhs1, int lhs2, int rhs1, int rhs2) const
|
||||
@ -178,12 +208,7 @@ CV_EXPORTS bool cv::gpu::ptxVersionIs(int major, int minor)
|
||||
return lhs1 == rhs1 && lhs2 == rhs2;
|
||||
}
|
||||
};
|
||||
return checkPtxVersion(major, minor, ComparerEqual());
|
||||
}
|
||||
|
||||
|
||||
CV_EXPORTS bool cv::gpu::ptxVersionIsLessOrEqual(int major, int minor)
|
||||
{
|
||||
struct ComparerLessOrEqual
|
||||
{
|
||||
bool operator()(int lhs1, int lhs2, int rhs1, int rhs2) const
|
||||
@ -191,12 +216,7 @@ CV_EXPORTS bool cv::gpu::ptxVersionIsLessOrEqual(int major, int minor)
|
||||
return lhs1 < rhs1 || (lhs1 == rhs1 && lhs2 <= rhs2);
|
||||
}
|
||||
};
|
||||
return checkPtxVersion(major, minor, ComparerLessOrEqual());
|
||||
}
|
||||
|
||||
|
||||
CV_EXPORTS bool cv::gpu::ptxVersionIsGreaterOrEqual(int major, int minor)
|
||||
{
|
||||
struct ComparerGreaterOrEqual
|
||||
{
|
||||
bool operator()(int lhs1, int lhs2, int rhs1, int rhs2) const
|
||||
@ -204,10 +224,52 @@ CV_EXPORTS bool cv::gpu::ptxVersionIsGreaterOrEqual(int major, int minor)
|
||||
return lhs1 > rhs1 || (lhs1 == rhs1 && lhs2 >= rhs2);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
CV_EXPORTS bool cv::gpu::hasPtxVersion(int major, int minor)
|
||||
{
|
||||
return checkPtxVersion(major, minor, ComparerEqual());
|
||||
}
|
||||
|
||||
|
||||
CV_EXPORTS bool cv::gpu::hasLessOrEqualPtxVersion(int major, int minor)
|
||||
{
|
||||
return checkPtxVersion(major, minor, ComparerLessOrEqual());
|
||||
}
|
||||
|
||||
|
||||
CV_EXPORTS bool cv::gpu::hasGreaterOrEqualPtxVersion(int major, int minor)
|
||||
{
|
||||
return checkPtxVersion(major, minor, ComparerGreaterOrEqual());
|
||||
}
|
||||
|
||||
|
||||
CV_EXPORTS bool cv::gpu::hasCubinVersion(int major, int minor)
|
||||
{
|
||||
return checkCubinVersion(major, minor, ComparerEqual());
|
||||
}
|
||||
|
||||
|
||||
CV_EXPORTS bool cv::gpu::hasGreaterOrEqualCubinVersion(int major, int minor)
|
||||
{
|
||||
return checkCubinVersion(major, minor, ComparerGreaterOrEqual());
|
||||
}
|
||||
|
||||
|
||||
CV_EXPORTS bool cv::gpu::hasVersion(int major, int minor)
|
||||
{
|
||||
return hasPtxVersion(major, minor) || hasCubinVersion(major, minor);
|
||||
}
|
||||
|
||||
|
||||
CV_EXPORTS bool cv::gpu::hasGreaterOrEqualVersion(int major, int minor)
|
||||
{
|
||||
return hasGreaterOrEqualPtxVersion(major, minor) ||
|
||||
hasGreaterOrEqualCubinVersion(major, minor);
|
||||
}
|
||||
|
||||
|
||||
CV_EXPORTS bool cv::gpu::isCompatibleWith(int device)
|
||||
{
|
||||
// According to the CUDA C Programming Guide Version 3.2: "PTX code
|
||||
@ -217,7 +279,16 @@ CV_EXPORTS bool cv::gpu::isCompatibleWith(int device)
|
||||
int major, minor;
|
||||
getComputeCapability(device, major, minor);
|
||||
|
||||
return ptxVersionIsLessOrEqual(major, minor);
|
||||
// Check PTX compatibility
|
||||
if (hasLessOrEqualPtxVersion(major, minor))
|
||||
return true;
|
||||
|
||||
// Check CUBIN compatibilty
|
||||
for (int i = 0; i <= minor; ++i)
|
||||
if (hasCubinVersion(major, i))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -166,7 +166,7 @@ Scalar cv::gpu::sum(const GpuMat& src, GpuMat& buf)
|
||||
ensureSizeIsEnough(buf_size, CV_8U, buf);
|
||||
|
||||
Caller* callers = multipass_callers;
|
||||
if (ptxVersionIsGreaterOrEqual(1, 1) && hasAtomicsSupport(getDevice()))
|
||||
if (hasGreaterOrEqualVersion(1, 1) && hasAtomicsSupport(getDevice()))
|
||||
callers = singlepass_callers;
|
||||
|
||||
Caller caller = callers[src.depth()];
|
||||
@ -202,7 +202,7 @@ Scalar cv::gpu::sqrSum(const GpuMat& src, GpuMat& buf)
|
||||
sqrSumCaller<int>, sqrSumCaller<float>, 0 };
|
||||
|
||||
Caller* callers = multipass_callers;
|
||||
if (ptxVersionIsGreaterOrEqual(1, 1) && hasAtomicsSupport(getDevice()))
|
||||
if (hasGreaterOrEqualVersion(1, 1) && hasAtomicsSupport(getDevice()))
|
||||
callers = singlepass_callers;
|
||||
|
||||
Size buf_size;
|
||||
@ -289,7 +289,7 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp
|
||||
if (mask.empty())
|
||||
{
|
||||
Caller* callers = multipass_callers;
|
||||
if (ptxVersionIsGreaterOrEqual(1, 1) && hasAtomicsSupport(getDevice()))
|
||||
if (hasGreaterOrEqualVersion(1, 1) && hasAtomicsSupport(getDevice()))
|
||||
callers = singlepass_callers;
|
||||
|
||||
Caller caller = callers[src.type()];
|
||||
@ -299,7 +299,7 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp
|
||||
else
|
||||
{
|
||||
MaskedCaller* callers = masked_multipass_callers;
|
||||
if (ptxVersionIsGreaterOrEqual(1, 1) && hasAtomicsSupport(getDevice()))
|
||||
if (hasGreaterOrEqualVersion(1, 1) && hasAtomicsSupport(getDevice()))
|
||||
callers = masked_singlepass_callers;
|
||||
|
||||
MaskedCaller caller = callers[src.type()];
|
||||
@ -389,7 +389,7 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point
|
||||
if (mask.empty())
|
||||
{
|
||||
Caller* callers = multipass_callers;
|
||||
if (ptxVersionIsGreaterOrEqual(1, 1) && hasAtomicsSupport(getDevice()))
|
||||
if (hasGreaterOrEqualVersion(1, 1) && hasAtomicsSupport(getDevice()))
|
||||
callers = singlepass_callers;
|
||||
|
||||
Caller caller = callers[src.type()];
|
||||
@ -399,7 +399,7 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point
|
||||
else
|
||||
{
|
||||
MaskedCaller* callers = masked_multipass_callers;
|
||||
if (ptxVersionIsGreaterOrEqual(1, 1) && hasAtomicsSupport(getDevice()))
|
||||
if (hasGreaterOrEqualVersion(1, 1) && hasAtomicsSupport(getDevice()))
|
||||
callers = masked_singlepass_callers;
|
||||
|
||||
MaskedCaller caller = callers[src.type()];
|
||||
@ -459,7 +459,7 @@ int cv::gpu::countNonZero(const GpuMat& src, GpuMat& buf)
|
||||
ensureSizeIsEnough(buf_size, CV_8U, buf);
|
||||
|
||||
Caller* callers = multipass_callers;
|
||||
if (ptxVersionIsGreaterOrEqual(1, 1) && hasAtomicsSupport(getDevice()))
|
||||
if (hasGreaterOrEqualVersion(1, 1) && hasAtomicsSupport(getDevice()))
|
||||
callers = singlepass_callers;
|
||||
|
||||
Caller caller = callers[src.type()];
|
||||
|
Loading…
x
Reference in New Issue
Block a user