92 lines
		
	
	
		
			3.4 KiB
		
	
	
	
		
			CMake
		
	
	
	
	
	
			
		
		
	
	
			92 lines
		
	
	
		
			3.4 KiB
		
	
	
	
		
			CMake
		
	
	
	
	
	
find_package(CUDA 4.1)
 | 
						|
 | 
						|
if(CUDA_FOUND)
 | 
						|
  set(HAVE_CUDA 1)
 | 
						|
 | 
						|
  if(WITH_CUFFT)
 | 
						|
    set(HAVE_CUFFT 1)
 | 
						|
  endif()
 | 
						|
 | 
						|
  if(WITH_CUBLAS)
 | 
						|
    set(HAVE_CUBLAS 1)
 | 
						|
  endif()
 | 
						|
 | 
						|
  message(STATUS "CUDA detected: " ${CUDA_VERSION})
 | 
						|
 | 
						|
  set(CUDA_ARCH_BIN "1.1 1.2 1.3 2.0 2.1(2.0)" CACHE STRING "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported")
 | 
						|
  set(CUDA_ARCH_PTX "2.0" CACHE STRING "Specify 'virtual' PTX architectures to build PTX intermediate code for")
 | 
						|
 | 
						|
  string(REGEX REPLACE "\\." "" ARCH_BIN_NO_POINTS "${CUDA_ARCH_BIN}")
 | 
						|
  string(REGEX REPLACE "\\." "" ARCH_PTX_NO_POINTS "${CUDA_ARCH_PTX}")
 | 
						|
 | 
						|
  # Ckeck if user specified 1.0 compute capability: we don't support it
 | 
						|
  string(REGEX MATCH "1.0" HAS_ARCH_10 "${CUDA_ARCH_BIN} ${CUDA_ARCH_PTX}")
 | 
						|
  set(CUDA_ARCH_BIN_OR_PTX_10 0)
 | 
						|
  if(NOT ${HAS_ARCH_10} STREQUAL "")
 | 
						|
    set(CUDA_ARCH_BIN_OR_PTX_10 1)
 | 
						|
  endif()
 | 
						|
 | 
						|
  # NVCC flags to be set
 | 
						|
  set(NVCC_FLAGS_EXTRA "")
 | 
						|
 | 
						|
  # These vars will be passed into the templates
 | 
						|
  set(OPENCV_CUDA_ARCH_BIN "")
 | 
						|
  set(OPENCV_CUDA_ARCH_PTX "")
 | 
						|
  set(OPENCV_CUDA_ARCH_FEATURES "")
 | 
						|
 | 
						|
  # Tell NVCC to add binaries for the specified GPUs
 | 
						|
  string(REGEX MATCHALL "[0-9()]+" ARCH_LIST "${ARCH_BIN_NO_POINTS}")
 | 
						|
  foreach(ARCH IN LISTS ARCH_LIST)
 | 
						|
    if(ARCH MATCHES "([0-9]+)\\(([0-9]+)\\)")
 | 
						|
      # User explicitly specified PTX for the concrete BIN
 | 
						|
      set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1})
 | 
						|
      set(OPENCV_CUDA_ARCH_BIN "${OPENCV_CUDA_ARCH_BIN} ${CMAKE_MATCH_1}")
 | 
						|
      set(OPENCV_CUDA_ARCH_FEATURES "${OPENCV_CUDA_ARCH_FEATURES} ${CMAKE_MATCH_2}")
 | 
						|
    else()
 | 
						|
      # User didn't explicitly specify PTX for the concrete BIN, we assume PTX=BIN
 | 
						|
      set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -gencode arch=compute_${ARCH},code=sm_${ARCH})
 | 
						|
      set(OPENCV_CUDA_ARCH_BIN "${OPENCV_CUDA_ARCH_BIN} ${ARCH}")
 | 
						|
      set(OPENCV_CUDA_ARCH_FEATURES "${OPENCV_CUDA_ARCH_FEATURES} ${ARCH}")
 | 
						|
    endif()
 | 
						|
  endforeach()
 | 
						|
 | 
						|
  # Tell NVCC to add PTX intermediate code for the specified architectures
 | 
						|
  string(REGEX MATCHALL "[0-9]+" ARCH_LIST "${ARCH_PTX_NO_POINTS}")
 | 
						|
    foreach(ARCH IN LISTS ARCH_LIST)
 | 
						|
      set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -gencode arch=compute_${ARCH},code=compute_${ARCH})
 | 
						|
      set(OPENCV_CUDA_ARCH_PTX "${OPENCV_CUDA_ARCH_PTX} ${ARCH}")
 | 
						|
      set(OPENCV_CUDA_ARCH_FEATURES "${OPENCV_CUDA_ARCH_FEATURES} ${ARCH}")
 | 
						|
    endforeach()
 | 
						|
 | 
						|
  # These vars will be processed in other scripts
 | 
						|
  set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ${NVCC_FLAGS_EXTRA})
 | 
						|
  set(OpenCV_CUDA_CC "${NVCC_FLAGS_EXTRA}")
 | 
						|
 | 
						|
  message(STATUS "CUDA NVCC target flags: ${CUDA_NVCC_FLAGS}")
 | 
						|
 | 
						|
  unset(CUDA_npp_LIBRARY CACHE)
 | 
						|
  find_cuda_helper_libs(npp)
 | 
						|
 | 
						|
  macro(OCV_CUDA_COMPILE VAR)
 | 
						|
    if (BUILD_SHARED_LIBS)
 | 
						|
      set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Xcompiler -DCVAPI_EXPORTS)
 | 
						|
    endif()
 | 
						|
 | 
						|
    if(UNIX OR APPLE)
 | 
						|
      set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Xcompiler -fPIC)
 | 
						|
    endif()
 | 
						|
    if(APPLE)
 | 
						|
      set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Xcompiler -fno-finite-math-only)
 | 
						|
    endif()
 | 
						|
 | 
						|
    # we remove -ggdb3 flag as it leads to preprocessor errors when compiling CUDA files (CUDA 4.1) 
 | 
						|
    set(CMAKE_CXX_FLAGS_DEBUG_ ${CMAKE_CXX_FLAGS_DEBUG}) 
 | 
						|
    string(REPLACE "-ggdb3" "" CMAKE_CXX_FLAGS_DEBUG ${CMAKE_CXX_FLAGS_DEBUG})
 | 
						|
    CUDA_COMPILE(${VAR} ${ARGN})
 | 
						|
    set(CMAKE_CXX_DEBUG_FLAGS ${CMAKE_CXX_FLAGS_DEBUG_})
 | 
						|
  endmacro()
 | 
						|
else()
 | 
						|
  unset(CUDA_ARCH_BIN CACHE)
 | 
						|
  unset(CUDA_ARCH_PTX CACHE)
 | 
						|
endif()
 |