modified according to CUDA 4.0 API updates
This commit is contained in:
@@ -1,236 +0,0 @@
|
|||||||
set(name "gpu")
|
|
||||||
|
|
||||||
set(the_target "opencv_${name}")
|
|
||||||
project(${the_target})
|
|
||||||
|
|
||||||
set(DEPS "opencv_core" "opencv_imgproc" "opencv_objdetect" "opencv_features2d" "opencv_flann" "opencv_calib3d") #"opencv_features2d" "opencv_flann" "opencv_objdetect" - only headers needed
|
|
||||||
set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} opencv_gpu)
|
|
||||||
|
|
||||||
include_directories("${CMAKE_CURRENT_SOURCE_DIR}/include"
|
|
||||||
"${CMAKE_CURRENT_SOURCE_DIR}/src/cuda"
|
|
||||||
"${CMAKE_CURRENT_SOURCE_DIR}/src"
|
|
||||||
"${CMAKE_CURRENT_BINARY_DIR}")
|
|
||||||
|
|
||||||
file(GLOB lib_srcs "src/*.cpp")
|
|
||||||
file(GLOB lib_int_hdrs "src/*.h*")
|
|
||||||
file(GLOB lib_cuda "src/cuda/*.cu*")
|
|
||||||
file(GLOB lib_cuda_hdrs "src/cuda/*.h*")
|
|
||||||
source_group("Src\\Host" FILES ${lib_srcs} ${lib_int_hdrs})
|
|
||||||
source_group("Src\\Cuda" FILES ${lib_cuda} ${lib_cuda_hdrs})
|
|
||||||
|
|
||||||
file(GLOB lib_hdrs "include/opencv2/${name}/*.h*")
|
|
||||||
source_group("Include" FILES ${lib_hdrs})
|
|
||||||
|
|
||||||
#file(GLOB lib_device_hdrs "include/opencv2/${name}/device/*.h*")
|
|
||||||
file(GLOB lib_device_hdrs "src/opencv2/gpu/device/*.h*")
|
|
||||||
source_group("Device" FILES ${lib_device_hdrs})
|
|
||||||
|
|
||||||
if (HAVE_CUDA)
|
|
||||||
file(GLOB_RECURSE ncv_srcs "src/nvidia/*.cpp")
|
|
||||||
file(GLOB_RECURSE ncv_cuda "src/nvidia/*.cu")
|
|
||||||
file(GLOB_RECURSE ncv_hdrs "src/nvidia/*.hpp" "src/nvidia/*.h")
|
|
||||||
|
|
||||||
source_group("Src\\NVidia" FILES ${ncv_srcs} ${ncv_hdrs} ${ncv_cuda})
|
|
||||||
include_directories("src/nvidia/core" "src/nvidia/NPP_staging")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (HAVE_CUDA)
|
|
||||||
get_filename_component(_path_to_findnpp "${CMAKE_CURRENT_LIST_FILE}" PATH)
|
|
||||||
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${_path_to_findnpp})
|
|
||||||
find_package(NPP 3.2.16 REQUIRED)
|
|
||||||
message(STATUS "NPP detected: " ${NPP_VERSION})
|
|
||||||
|
|
||||||
include_directories(${CUDA_INCLUDE_DIRS} ${CUDA_NPP_INCLUDES})
|
|
||||||
|
|
||||||
if (UNIX OR APPLE)
|
|
||||||
set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler;-fPIC;")
|
|
||||||
#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" "-fPIC")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
#set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-keep")
|
|
||||||
#set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler;/EHsc-;")
|
|
||||||
|
|
||||||
string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
|
||||||
string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}")
|
|
||||||
string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}")
|
|
||||||
|
|
||||||
if(MSVC)
|
|
||||||
#string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
|
||||||
#string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}")
|
|
||||||
#string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}")
|
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4211 /wd4201 /wd4100 /wd4505 /wd4408")
|
|
||||||
|
|
||||||
string(REPLACE "/EHsc-" "/EHs" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
|
|
||||||
string(REPLACE "/EHsc-" "/EHs" CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE}")
|
|
||||||
string(REPLACE "/EHsc-" "/EHs" CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG}")
|
|
||||||
string(REPLACE "/EHsc-" "/EHs" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
|
||||||
string(REPLACE "/EHsc-" "/EHs" CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}")
|
|
||||||
string(REPLACE "/EHsc-" "/EHs" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (OPENCV_BUILD_SHARED_LIB)
|
|
||||||
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler;-DCVAPI_EXPORTS")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
CUDA_COMPILE(cuda_objs ${lib_cuda} ${ncv_cuda})
|
|
||||||
#CUDA_BUILD_CLEAN_TARGET()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
foreach(d ${DEPS})
|
|
||||||
if(${d} MATCHES "opencv_")
|
|
||||||
string(REPLACE "opencv_" "${CMAKE_CURRENT_SOURCE_DIR}/../" d_dir ${d})
|
|
||||||
include_directories("${d_dir}/include")
|
|
||||||
endif()
|
|
||||||
endforeach()
|
|
||||||
|
|
||||||
add_library(${the_target} ${lib_srcs} ${lib_hdrs} ${lib_int_hdrs} ${lib_cuda} ${lib_cuda_hdrs} ${lib_device_hdrs} ${ncv_srcs} ${ncv_hdrs} ${ncv_cuda} ${cuda_objs})
|
|
||||||
|
|
||||||
if(PCHSupport_FOUND)
|
|
||||||
set(pch_header ${CMAKE_CURRENT_SOURCE_DIR}/src/precomp.hpp)
|
|
||||||
if(${CMAKE_GENERATOR} MATCHES "Visual*" OR ${CMAKE_GENERATOR} MATCHES "Xcode*")
|
|
||||||
if(${CMAKE_GENERATOR} MATCHES "Visual*")
|
|
||||||
set(${the_target}_pch "src/precomp.cpp")
|
|
||||||
endif()
|
|
||||||
add_native_precompiled_header(${the_target} ${pch_header})
|
|
||||||
elseif(CMAKE_COMPILER_IS_GNUCXX AND ${CMAKE_GENERATOR} MATCHES ".*Makefiles")
|
|
||||||
add_precompiled_header(${the_target} ${pch_header})
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# For dynamic link numbering convenions
|
|
||||||
set_target_properties(${the_target} PROPERTIES
|
|
||||||
VERSION ${OPENCV_VERSION}
|
|
||||||
SOVERSION ${OPENCV_SOVERSION}
|
|
||||||
OUTPUT_NAME "${the_target}${OPENCV_DLLVERSION}"
|
|
||||||
)
|
|
||||||
|
|
||||||
if(ENABLE_SOLUTION_FOLDERS)
|
|
||||||
set_target_properties(${the_target} PROPERTIES FOLDER "modules")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (OPENCV_BUILD_SHARED_LIB)
|
|
||||||
if (MSVC)
|
|
||||||
set_target_properties(${the_target} PROPERTIES DEFINE_SYMBOL CVAPI_EXPORTS)
|
|
||||||
else()
|
|
||||||
add_definitions(-DCVAPI_EXPORTS)
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# Additional target properties
|
|
||||||
set_target_properties(${the_target} PROPERTIES
|
|
||||||
DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}"
|
|
||||||
ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib/"
|
|
||||||
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin/"
|
|
||||||
INSTALL_NAME_DIR "${CMAKE_INSTALL_PREFIX}/lib"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Add the required libraries for linking:
|
|
||||||
target_link_libraries(${the_target} ${OPENCV_LINKER_LIBS} ${IPP_LIBS} ${DEPS} )
|
|
||||||
|
|
||||||
if (HAVE_CUDA)
|
|
||||||
target_link_libraries(${the_target} ${CUDA_LIBRARIES} ${CUDA_NPP_LIBRARIES})
|
|
||||||
CUDA_ADD_CUFFT_TO_TARGET(${the_target})
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if(MSVC)
|
|
||||||
if(CMAKE_CROSSCOMPILING)
|
|
||||||
set_target_properties(${the_target} PROPERTIES LINK_FLAGS "/NODEFAULTLIB:secchk")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
set_target_properties(${the_target} PROPERTIES LINK_FLAGS "/NODEFAULTLIB:libc")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# Dependencies of this target:
|
|
||||||
add_dependencies(${the_target} ${DEPS})
|
|
||||||
|
|
||||||
install(TARGETS ${the_target}
|
|
||||||
RUNTIME DESTINATION bin COMPONENT main
|
|
||||||
LIBRARY DESTINATION lib COMPONENT main
|
|
||||||
ARCHIVE DESTINATION lib COMPONENT main)
|
|
||||||
|
|
||||||
install(FILES ${lib_hdrs}
|
|
||||||
DESTINATION include/opencv2/${name}
|
|
||||||
COMPONENT main)
|
|
||||||
|
|
||||||
install(FILES src/nvidia/NPP_staging/NPP_staging.hpp src/nvidia/core/NCV.hpp
|
|
||||||
DESTINATION include/opencv2/${name}
|
|
||||||
COMPONENT main)
|
|
||||||
|
|
||||||
#install(FILES ${lib_device_hdrs}
|
|
||||||
# DESTINATION include/opencv2/${name}/device
|
|
||||||
# COMPONENT main)
|
|
||||||
|
|
||||||
|
|
||||||
################################################################################################################
|
|
||||||
################################ GPU Module Tests #####################################################
|
|
||||||
################################################################################################################
|
|
||||||
|
|
||||||
# Test files processing is in the separated directory to avoid 'Src' source
|
|
||||||
# filter creation in Visual Studio
|
|
||||||
if(BUILD_TESTS AND EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/test)
|
|
||||||
set(the_test_target "opencv_test_${name}")
|
|
||||||
|
|
||||||
include_directories("${CMAKE_CURRENT_SOURCE_DIR}/include"
|
|
||||||
"${CMAKE_CURRENT_SOURCE_DIR}/test"
|
|
||||||
"${CMAKE_CURRENT_BINARY_DIR}")
|
|
||||||
|
|
||||||
set(test_deps opencv_${name} opencv_ts opencv_highgui opencv_calib3d ${DEPS})
|
|
||||||
foreach(d ${test_deps})
|
|
||||||
if(${d} MATCHES "opencv_")
|
|
||||||
if(${d} MATCHES "opencv_lapack")
|
|
||||||
else()
|
|
||||||
string(REPLACE "opencv_" "${CMAKE_CURRENT_SOURCE_DIR}/../" d_dir ${d})
|
|
||||||
include_directories("${d_dir}/include")
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
endforeach()
|
|
||||||
|
|
||||||
file(GLOB test_srcs "test/*.cpp")
|
|
||||||
file(GLOB test_hdrs "test/*.h*")
|
|
||||||
|
|
||||||
source_group("Src" FILES ${test_hdrs} ${test_srcs})
|
|
||||||
if(HAVE_CUDA)
|
|
||||||
include_directories(${CUDA_INCLUDE_DIRS} ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia/core ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia/NPP_staging)
|
|
||||||
|
|
||||||
file(GLOB nvidia "test/nvidia/*.cpp" "test/nvidia/*.h*")
|
|
||||||
source_group("Src\\NVidia" FILES ${nvidia})
|
|
||||||
endif()
|
|
||||||
|
|
||||||
|
|
||||||
add_executable(${the_test_target} ${test_srcs} ${test_hdrs} ${nvidia})
|
|
||||||
|
|
||||||
if(PCHSupport_FOUND)
|
|
||||||
set(pch_header ${CMAKE_CURRENT_SOURCE_DIR}/test/test_precomp.hpp)
|
|
||||||
if(${CMAKE_GENERATOR} MATCHES "Visual*" OR ${CMAKE_GENERATOR} MATCHES "Xcode*")
|
|
||||||
if(${CMAKE_GENERATOR} MATCHES "Visual*")
|
|
||||||
set(${the_test_target}_pch "test/test_precomp.cpp")
|
|
||||||
endif()
|
|
||||||
add_native_precompiled_header(${the_test_target} ${pch_header})
|
|
||||||
elseif(CMAKE_COMPILER_IS_GNUCXX AND ${CMAKE_GENERATOR} MATCHES ".*Makefiles")
|
|
||||||
add_precompiled_header(${the_test_target} ${pch_header})
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# Additional target properties
|
|
||||||
set_target_properties(${the_test_target} PROPERTIES
|
|
||||||
DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}"
|
|
||||||
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin/"
|
|
||||||
)
|
|
||||||
|
|
||||||
if(ENABLE_SOLUTION_FOLDERS)
|
|
||||||
set_target_properties(${the_test_target} PROPERTIES FOLDER "tests")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
add_dependencies(${the_test_target} ${test_deps})
|
|
||||||
|
|
||||||
# Add the required libraries for linking:
|
|
||||||
target_link_libraries(${the_test_target} ${OPENCV_LINKER_LIBS} ${test_deps})
|
|
||||||
|
|
||||||
enable_testing()
|
|
||||||
get_target_property(LOC ${the_test_target} LOCATION)
|
|
||||||
add_test(${the_test_target} "${LOC}")
|
|
||||||
|
|
||||||
if(WIN32)
|
|
||||||
install(TARGETS ${the_test_target} RUNTIME DESTINATION bin COMPONENT main)
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
@@ -1,240 +0,0 @@
|
|||||||
set(name "gpu")
|
|
||||||
|
|
||||||
set(the_target "opencv_${name}")
|
|
||||||
project(${the_target})
|
|
||||||
|
|
||||||
set(DEPS "opencv_core" "opencv_imgproc" "opencv_objdetect" "opencv_features2d" "opencv_flann" "opencv_calib3d") #"opencv_features2d" "opencv_flann" "opencv_objdetect" - only headers needed
|
|
||||||
set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} opencv_gpu)
|
|
||||||
|
|
||||||
include_directories("${CMAKE_CURRENT_SOURCE_DIR}/include"
|
|
||||||
"${CMAKE_CURRENT_SOURCE_DIR}/src/cuda"
|
|
||||||
"${CMAKE_CURRENT_SOURCE_DIR}/src"
|
|
||||||
"${CMAKE_CURRENT_BINARY_DIR}")
|
|
||||||
|
|
||||||
file(GLOB lib_srcs "src/*.cpp")
|
|
||||||
file(GLOB lib_int_hdrs "src/*.h*")
|
|
||||||
file(GLOB lib_cuda "src/cuda/*.cu*")
|
|
||||||
file(GLOB lib_cuda_hdrs "src/cuda/*.h*")
|
|
||||||
source_group("Src\\Host" FILES ${lib_srcs} ${lib_int_hdrs})
|
|
||||||
source_group("Src\\Cuda" FILES ${lib_cuda} ${lib_cuda_hdrs})
|
|
||||||
|
|
||||||
file(GLOB lib_hdrs "include/opencv2/${name}/*.h*")
|
|
||||||
source_group("Include" FILES ${lib_hdrs})
|
|
||||||
|
|
||||||
#file(GLOB lib_device_hdrs "include/opencv2/${name}/device/*.h*")
|
|
||||||
file(GLOB lib_device_hdrs "src/opencv2/gpu/device/*.h*")
|
|
||||||
source_group("Device" FILES ${lib_device_hdrs})
|
|
||||||
|
|
||||||
if (HAVE_CUDA)
|
|
||||||
file(GLOB_RECURSE ncv_srcs "src/nvidia/*.cpp")
|
|
||||||
file(GLOB_RECURSE ncv_cuda "src/nvidia/*.cu")
|
|
||||||
file(GLOB_RECURSE ncv_hdrs "src/nvidia/*.hpp" "src/nvidia/*.h")
|
|
||||||
|
|
||||||
source_group("Src\\NVidia" FILES ${ncv_srcs} ${ncv_hdrs} ${ncv_cuda})
|
|
||||||
include_directories("src/nvidia/core" "src/nvidia/NPP_staging")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (HAVE_CUDA)
|
|
||||||
#get_filename_component(_path_to_findnpp "${CMAKE_CURRENT_LIST_FILE}" PATH)
|
|
||||||
#set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${_path_to_findnpp})
|
|
||||||
#find_package(NPP 3.2.16 REQUIRED)
|
|
||||||
#message(STATUS "NPP detected: " ${NPP_VERSION})
|
|
||||||
|
|
||||||
include_directories(${CUDA_INCLUDE_DIRS})
|
|
||||||
|
|
||||||
if (UNIX OR APPLE)
|
|
||||||
set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler;-fPIC;")
|
|
||||||
#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" "-fPIC")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
#set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-keep")
|
|
||||||
#set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler;/EHsc-;")
|
|
||||||
|
|
||||||
string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
|
||||||
string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}")
|
|
||||||
string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}")
|
|
||||||
|
|
||||||
if(MSVC)
|
|
||||||
#string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
|
||||||
#string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}")
|
|
||||||
#string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}")
|
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4211 /wd4201 /wd4100 /wd4505 /wd4408")
|
|
||||||
|
|
||||||
string(REPLACE "/EHsc-" "/EHs" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
|
|
||||||
string(REPLACE "/EHsc-" "/EHs" CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE}")
|
|
||||||
string(REPLACE "/EHsc-" "/EHs" CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG}")
|
|
||||||
string(REPLACE "/EHsc-" "/EHs" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
|
||||||
string(REPLACE "/EHsc-" "/EHs" CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}")
|
|
||||||
string(REPLACE "/EHsc-" "/EHs" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (OPENCV_BUILD_SHARED_LIB)
|
|
||||||
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler;-DCVAPI_EXPORTS")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
CUDA_COMPILE(cuda_objs ${lib_cuda} ${ncv_cuda})
|
|
||||||
#CUDA_BUILD_CLEAN_TARGET()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
foreach(d ${DEPS})
|
|
||||||
if(${d} MATCHES "opencv_")
|
|
||||||
string(REPLACE "opencv_" "${CMAKE_CURRENT_SOURCE_DIR}/../" d_dir ${d})
|
|
||||||
include_directories("${d_dir}/include")
|
|
||||||
endif()
|
|
||||||
endforeach()
|
|
||||||
|
|
||||||
add_library(${the_target} ${lib_srcs} ${lib_hdrs} ${lib_int_hdrs} ${lib_cuda} ${lib_cuda_hdrs} ${lib_device_hdrs} ${ncv_srcs} ${ncv_hdrs} ${ncv_cuda} ${cuda_objs})
|
|
||||||
|
|
||||||
if(PCHSupport_FOUND)
|
|
||||||
set(pch_header ${CMAKE_CURRENT_SOURCE_DIR}/src/precomp.hpp)
|
|
||||||
if(${CMAKE_GENERATOR} MATCHES "Visual*" OR ${CMAKE_GENERATOR} MATCHES "Xcode*")
|
|
||||||
if(${CMAKE_GENERATOR} MATCHES "Visual*")
|
|
||||||
set(${the_target}_pch "src/precomp.cpp")
|
|
||||||
endif()
|
|
||||||
add_native_precompiled_header(${the_target} ${pch_header})
|
|
||||||
elseif(CMAKE_COMPILER_IS_GNUCXX AND ${CMAKE_GENERATOR} MATCHES ".*Makefiles")
|
|
||||||
add_precompiled_header(${the_target} ${pch_header})
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# For dynamic link numbering convenions
|
|
||||||
set_target_properties(${the_target} PROPERTIES
|
|
||||||
VERSION ${OPENCV_VERSION}
|
|
||||||
SOVERSION ${OPENCV_SOVERSION}
|
|
||||||
OUTPUT_NAME "${the_target}${OPENCV_DLLVERSION}"
|
|
||||||
)
|
|
||||||
|
|
||||||
if(ENABLE_SOLUTION_FOLDERS)
|
|
||||||
set_target_properties(${the_target} PROPERTIES FOLDER "modules")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (OPENCV_BUILD_SHARED_LIB)
|
|
||||||
if (MSVC)
|
|
||||||
set_target_properties(${the_target} PROPERTIES DEFINE_SYMBOL CVAPI_EXPORTS)
|
|
||||||
else()
|
|
||||||
add_definitions(-DCVAPI_EXPORTS)
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# Additional target properties
|
|
||||||
set_target_properties(${the_target} PROPERTIES
|
|
||||||
DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}"
|
|
||||||
ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib/"
|
|
||||||
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin/"
|
|
||||||
INSTALL_NAME_DIR "${CMAKE_INSTALL_PREFIX}/lib"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Add the required libraries for linking:
|
|
||||||
target_link_libraries(${the_target} ${OPENCV_LINKER_LIBS} ${IPP_LIBS} ${DEPS} )
|
|
||||||
|
|
||||||
if (HAVE_CUDA)
|
|
||||||
target_link_libraries(${the_target} ${CUDA_LIBRARIES})
|
|
||||||
CUDA_ADD_CUFFT_TO_TARGET(${the_target})
|
|
||||||
|
|
||||||
unset(CUDA_npp_LIBRARY CACHE)
|
|
||||||
find_cuda_helper_libs(npp)
|
|
||||||
target_link_libraries(${the_target} ${CUDA_npp_LIBRARY})
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if(MSVC)
|
|
||||||
if(CMAKE_CROSSCOMPILING)
|
|
||||||
set_target_properties(${the_target} PROPERTIES LINK_FLAGS "/NODEFAULTLIB:secchk")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
set_target_properties(${the_target} PROPERTIES LINK_FLAGS "/NODEFAULTLIB:libc")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# Dependencies of this target:
|
|
||||||
add_dependencies(${the_target} ${DEPS})
|
|
||||||
|
|
||||||
install(TARGETS ${the_target}
|
|
||||||
RUNTIME DESTINATION bin COMPONENT main
|
|
||||||
LIBRARY DESTINATION lib COMPONENT main
|
|
||||||
ARCHIVE DESTINATION lib COMPONENT main)
|
|
||||||
|
|
||||||
install(FILES ${lib_hdrs}
|
|
||||||
DESTINATION include/opencv2/${name}
|
|
||||||
COMPONENT main)
|
|
||||||
|
|
||||||
install(FILES src/nvidia/NPP_staging/NPP_staging.hpp src/nvidia/core/NCV.hpp
|
|
||||||
DESTINATION include/opencv2/${name}
|
|
||||||
COMPONENT main)
|
|
||||||
|
|
||||||
#install(FILES ${lib_device_hdrs}
|
|
||||||
# DESTINATION include/opencv2/${name}/device
|
|
||||||
# COMPONENT main)
|
|
||||||
|
|
||||||
|
|
||||||
################################################################################################################
|
|
||||||
################################ GPU Module Tests #####################################################
|
|
||||||
################################################################################################################
|
|
||||||
|
|
||||||
# Test files processing is in the separated directory to avoid 'Src' source
|
|
||||||
# filter creation in Visual Studio
|
|
||||||
if(BUILD_TESTS AND EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/test)
|
|
||||||
set(the_test_target "opencv_test_${name}")
|
|
||||||
|
|
||||||
include_directories("${CMAKE_CURRENT_SOURCE_DIR}/include"
|
|
||||||
"${CMAKE_CURRENT_SOURCE_DIR}/test"
|
|
||||||
"${CMAKE_CURRENT_BINARY_DIR}")
|
|
||||||
|
|
||||||
set(test_deps opencv_${name} opencv_ts opencv_highgui opencv_calib3d ${DEPS})
|
|
||||||
foreach(d ${test_deps})
|
|
||||||
if(${d} MATCHES "opencv_")
|
|
||||||
if(${d} MATCHES "opencv_lapack")
|
|
||||||
else()
|
|
||||||
string(REPLACE "opencv_" "${CMAKE_CURRENT_SOURCE_DIR}/../" d_dir ${d})
|
|
||||||
include_directories("${d_dir}/include")
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
endforeach()
|
|
||||||
|
|
||||||
file(GLOB test_srcs "test/*.cpp")
|
|
||||||
file(GLOB test_hdrs "test/*.h*")
|
|
||||||
|
|
||||||
source_group("Src" FILES ${test_hdrs} ${test_srcs})
|
|
||||||
if(HAVE_CUDA)
|
|
||||||
include_directories(${CUDA_INCLUDE_DIRS} ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia/core ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia/NPP_staging)
|
|
||||||
|
|
||||||
file(GLOB nvidia "test/nvidia/*.cpp" "test/nvidia/*.h*")
|
|
||||||
source_group("Src\\NVidia" FILES ${nvidia})
|
|
||||||
endif()
|
|
||||||
|
|
||||||
|
|
||||||
add_executable(${the_test_target} ${test_srcs} ${test_hdrs} ${nvidia})
|
|
||||||
|
|
||||||
if(PCHSupport_FOUND)
|
|
||||||
set(pch_header ${CMAKE_CURRENT_SOURCE_DIR}/test/test_precomp.hpp)
|
|
||||||
if(${CMAKE_GENERATOR} MATCHES "Visual*" OR ${CMAKE_GENERATOR} MATCHES "Xcode*")
|
|
||||||
if(${CMAKE_GENERATOR} MATCHES "Visual*")
|
|
||||||
set(${the_test_target}_pch "test/test_precomp.cpp")
|
|
||||||
endif()
|
|
||||||
add_native_precompiled_header(${the_test_target} ${pch_header})
|
|
||||||
elseif(CMAKE_COMPILER_IS_GNUCXX AND ${CMAKE_GENERATOR} MATCHES ".*Makefiles")
|
|
||||||
add_precompiled_header(${the_test_target} ${pch_header})
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# Additional target properties
|
|
||||||
set_target_properties(${the_test_target} PROPERTIES
|
|
||||||
DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}"
|
|
||||||
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin/"
|
|
||||||
)
|
|
||||||
|
|
||||||
if(ENABLE_SOLUTION_FOLDERS)
|
|
||||||
set_target_properties(${the_test_target} PROPERTIES FOLDER "tests")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
add_dependencies(${the_test_target} ${test_deps})
|
|
||||||
|
|
||||||
# Add the required libraries for linking:
|
|
||||||
target_link_libraries(${the_test_target} ${OPENCV_LINKER_LIBS} ${test_deps})
|
|
||||||
|
|
||||||
enable_testing()
|
|
||||||
get_target_property(LOC ${the_test_target} LOCATION)
|
|
||||||
add_test(${the_test_target} "${LOC}")
|
|
||||||
|
|
||||||
if(WIN32)
|
|
||||||
install(TARGETS ${the_test_target} RUNTIME DESTINATION bin COMPONENT main)
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
@@ -1,125 +0,0 @@
|
|||||||
###############################################################################
|
|
||||||
#
|
|
||||||
# FindNPP.cmake
|
|
||||||
#
|
|
||||||
# CUDA_NPP_LIBRARY_ROOT_DIR -- Path to the NPP dorectory.
|
|
||||||
# CUDA_NPP_INCLUDES -- NPP Include directories.
|
|
||||||
# CUDA_NPP_LIBRARIES -- NPP libraries.
|
|
||||||
# NPP_VERSION -- NPP version in format "major.minor.build".
|
|
||||||
#
|
|
||||||
# If not found automatically, please set CUDA_NPP_LIBRARY_ROOT_DIR
|
|
||||||
# in CMake or set enviroment varivabe $CUDA_NPP_ROOT
|
|
||||||
#
|
|
||||||
# Author: Anatoly Baksheev, Itseez Ltd.
|
|
||||||
#
|
|
||||||
# The MIT License
|
|
||||||
#
|
|
||||||
# License for the specific language governing rights and limitations under
|
|
||||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
||||||
# copy of this software and associated documentation files (the "Software"),
|
|
||||||
# to deal in the Software without restriction, including without limitation
|
|
||||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
||||||
# and/or sell copies of the Software, and to permit persons to whom the
|
|
||||||
# Software is furnished to do so, subject to the following conditions:
|
|
||||||
#
|
|
||||||
# The above copyright notice and this permission notice shall be included
|
|
||||||
# in all copies or substantial portions of the Software.
|
|
||||||
#
|
|
||||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
|
||||||
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
||||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
||||||
# DEALINGS IN THE SOFTWARE.
|
|
||||||
#
|
|
||||||
###############################################################################
|
|
||||||
|
|
||||||
cmake_policy(PUSH)
|
|
||||||
cmake_minimum_required(VERSION 2.8.0)
|
|
||||||
cmake_policy(POP)
|
|
||||||
|
|
||||||
if(NOT "${CUDA_NPP_LIBRARY_ROOT_DIR}" STREQUAL "${CUDA_NPP_LIBRARY_ROOT_DIR_INTERNAL}")
|
|
||||||
unset(CUDA_NPP_INCLUDES CACHE)
|
|
||||||
unset(CUDA_NPP_LIBRARIES CACHE)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if(CMAKE_SIZEOF_VOID_P EQUAL 4)
|
|
||||||
if (UNIX OR APPLE)
|
|
||||||
set(NPP_SUFFIX "32")
|
|
||||||
else()
|
|
||||||
set(NPP_SUFFIX "-mt")
|
|
||||||
endif()
|
|
||||||
else(CMAKE_SIZEOF_VOID_P EQUAL 4)
|
|
||||||
if (UNIX OR APPLE)
|
|
||||||
set(NPP_SUFFIX "64")
|
|
||||||
else()
|
|
||||||
set(NPP_SUFFIX "-mt-x64")
|
|
||||||
endif()
|
|
||||||
endif(CMAKE_SIZEOF_VOID_P EQUAL 4)
|
|
||||||
|
|
||||||
if(NOT CUDA_NPP_LIBRARY_ROOT_DIR OR CUDA_NPP_LIBRARY_ROOT_DIR STREQUAL "")
|
|
||||||
unset(CUDA_NPP_LIBRARY_ROOT_DIR CACHE)
|
|
||||||
find_path(CUDA_NPP_LIBRARY_ROOT_DIR "common/npp/include/npp.h" PATHS ENV CUDA_NPP_ROOT DOC "NPP root directory.")
|
|
||||||
MESSAGE(STATUS "NPP root directory: " ${CUDA_NPP_LIBRARY_ROOT_DIR})
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# Search includes in our own paths.
|
|
||||||
find_path(CUDA_NPP_INCLUDES npp.h PATHS "${CUDA_NPP_LIBRARY_ROOT_DIR}/common/npp/include")
|
|
||||||
# Search default search paths, after we search our own set of paths.
|
|
||||||
find_path(CUDA_NPP_INCLUDES device_functions.h)
|
|
||||||
mark_as_advanced(CUDA_NPP_INCLUDES)
|
|
||||||
|
|
||||||
# Find NPP library
|
|
||||||
find_library(CUDA_NPP_LIBRARIES
|
|
||||||
NAMES "npp" "npp${NPP_SUFFIX}" "libnpp${NPP_SUFFIX}"
|
|
||||||
PATHS "${CUDA_NPP_LIBRARY_ROOT_DIR}"
|
|
||||||
PATH_SUFFIXES "common/lib" "common/npp/lib"
|
|
||||||
DOC "NPP library"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Search default search paths, after we search our own set of paths.
|
|
||||||
find_library(CUDA_NPP_LIBRARIES NAMES npp${NPP_SUFFIX} libnpp${NPP_SUFFIX} DOC "NPP library")
|
|
||||||
mark_as_advanced(CUDA_NPP_LIBRARIES)
|
|
||||||
|
|
||||||
if(EXISTS ${CUDA_NPP_INCLUDES}/nppversion.h)
|
|
||||||
file( STRINGS ${CUDA_NPP_INCLUDES}/nppversion.h npp_major REGEX "#define NPP_VERSION_MAJOR.*")
|
|
||||||
file( STRINGS ${CUDA_NPP_INCLUDES}/nppversion.h npp_minor REGEX "#define NPP_VERSION_MINOR.*")
|
|
||||||
file( STRINGS ${CUDA_NPP_INCLUDES}/nppversion.h npp_build REGEX "#define NPP_VERSION_BUILD.*")
|
|
||||||
|
|
||||||
string( REGEX REPLACE "#define NPP_VERSION_MAJOR[ \t]+|//.*" "" npp_major ${npp_major})
|
|
||||||
string( REGEX REPLACE "#define NPP_VERSION_MINOR[ \t]+|//.*" "" npp_minor ${npp_minor})
|
|
||||||
string( REGEX REPLACE "#define NPP_VERSION_BUILD[ \t]+|//.*" "" npp_build ${npp_build})
|
|
||||||
|
|
||||||
string( REGEX MATCH "[0-9]+" npp_major ${npp_major} )
|
|
||||||
string( REGEX MATCH "[0-9]+" npp_minor ${npp_minor} )
|
|
||||||
string( REGEX MATCH "[0-9]+" npp_build ${npp_build} )
|
|
||||||
set( NPP_VERSION "${npp_major}.${npp_minor}.${npp_build}")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if(NOT EXISTS ${CUDA_NPP_LIBRARIES} OR NOT EXISTS ${CUDA_NPP_INCLUDES}/npp.h)
|
|
||||||
set(CUDA_NPP_FOUND FALSE)
|
|
||||||
message(FATAL_ERROR "NPP headers/libraries are not found. Please specify CUDA_NPP_LIBRARY_ROOT_DIR in CMake or set $CUDA_NPP_ROOT.")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
include( FindPackageHandleStandardArgs )
|
|
||||||
find_package_handle_standard_args( NPP
|
|
||||||
REQUIRED_VARS
|
|
||||||
CUDA_NPP_INCLUDES
|
|
||||||
CUDA_NPP_LIBRARIES
|
|
||||||
#Need cmake 2.8.3 to uncomment this.
|
|
||||||
#VERSION_VAR
|
|
||||||
NPP_VERSION)
|
|
||||||
|
|
||||||
if(APPLE)
|
|
||||||
# We need to add the path to cudart to the linker using rpath, since the library name for the cuda libraries is prepended with @rpath.
|
|
||||||
get_filename_component(_cuda_path_to_npp "${CUDA_NPP_LIBRARIES}" PATH)
|
|
||||||
if(_cuda_path_to_npp)
|
|
||||||
list(APPEND CUDA_NPP_LIBRARIES "-Wl,-rpath,${_cuda_path_to_npp}")
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
set(CUDA_NPP_FOUND TRUE)
|
|
||||||
set(CUDA_NPP_LIBRARY_ROOT_DIR_INTERNAL "${CUDA_NPP_LIBRARY_ROOT_DIR}" CACHE INTERNAL "This is the value of the last time CUDA_NPP_LIBRARY_ROOT_DIR was set successfully." FORCE)
|
|
||||||
|
|
||||||
|
|
||||||
@@ -447,12 +447,21 @@ namespace cv
|
|||||||
|
|
||||||
// converts matrix type, ex from float to uchar depending on type
|
// converts matrix type, ex from float to uchar depending on type
|
||||||
void enqueueConvert(const GpuMat& src, GpuMat& dst, int type, double a = 1, double b = 0);
|
void enqueueConvert(const GpuMat& src, GpuMat& dst, int type, double a = 1, double b = 0);
|
||||||
|
|
||||||
|
static Stream& Null();
|
||||||
|
|
||||||
|
operator bool() const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void create();
|
void create();
|
||||||
void release();
|
void release();
|
||||||
|
|
||||||
struct Impl;
|
struct Impl;
|
||||||
Impl *impl;
|
Impl *impl;
|
||||||
|
|
||||||
friend struct StreamAccessor;
|
friend struct StreamAccessor;
|
||||||
|
|
||||||
|
explicit Stream(Impl* impl);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@@ -460,168 +469,130 @@ namespace cv
|
|||||||
|
|
||||||
//! transposes the matrix
|
//! transposes the matrix
|
||||||
//! supports matrix with element size = 1, 4 and 8 bytes (CV_8UC1, CV_8UC4, CV_16UC2, CV_32FC1, etc)
|
//! supports matrix with element size = 1, 4 and 8 bytes (CV_8UC1, CV_8UC4, CV_16UC2, CV_32FC1, etc)
|
||||||
CV_EXPORTS void transpose(const GpuMat& src1, GpuMat& dst);
|
CV_EXPORTS void transpose(const GpuMat& src1, GpuMat& dst, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
//! reverses the order of the rows, columns or both in a matrix
|
//! reverses the order of the rows, columns or both in a matrix
|
||||||
//! supports CV_8UC1, CV_8UC4 types
|
//! supports CV_8UC1, CV_8UC4 types
|
||||||
CV_EXPORTS void flip(const GpuMat& a, GpuMat& b, int flipCode);
|
CV_EXPORTS void flip(const GpuMat& a, GpuMat& b, int flipCode, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
//! transforms 8-bit unsigned integers using lookup table: dst(i)=lut(src(i))
|
//! transforms 8-bit unsigned integers using lookup table: dst(i)=lut(src(i))
|
||||||
//! destination array will have the depth type as lut and the same channels number as source
|
//! destination array will have the depth type as lut and the same channels number as source
|
||||||
//! supports CV_8UC1, CV_8UC3 types
|
//! supports CV_8UC1, CV_8UC3 types
|
||||||
CV_EXPORTS void LUT(const GpuMat& src, const Mat& lut, GpuMat& dst);
|
CV_EXPORTS void LUT(const GpuMat& src, const Mat& lut, GpuMat& dst, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
//! makes multi-channel array out of several single-channel arrays
|
//! makes multi-channel array out of several single-channel arrays
|
||||||
CV_EXPORTS void merge(const GpuMat* src, size_t n, GpuMat& dst);
|
CV_EXPORTS void merge(const GpuMat* src, size_t n, GpuMat& dst, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
//! makes multi-channel array out of several single-channel arrays
|
//! makes multi-channel array out of several single-channel arrays
|
||||||
CV_EXPORTS void merge(const vector<GpuMat>& src, GpuMat& dst);
|
CV_EXPORTS void merge(const vector<GpuMat>& src, GpuMat& dst, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
//! makes multi-channel array out of several single-channel arrays (async version)
|
|
||||||
CV_EXPORTS void merge(const GpuMat* src, size_t n, GpuMat& dst, const Stream& stream);
|
|
||||||
|
|
||||||
//! makes multi-channel array out of several single-channel arrays (async version)
|
|
||||||
CV_EXPORTS void merge(const vector<GpuMat>& src, GpuMat& dst, const Stream& stream);
|
|
||||||
|
|
||||||
//! copies each plane of a multi-channel array to a dedicated array
|
//! copies each plane of a multi-channel array to a dedicated array
|
||||||
CV_EXPORTS void split(const GpuMat& src, GpuMat* dst);
|
CV_EXPORTS void split(const GpuMat& src, GpuMat* dst, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
//! copies each plane of a multi-channel array to a dedicated array
|
//! copies each plane of a multi-channel array to a dedicated array
|
||||||
CV_EXPORTS void split(const GpuMat& src, vector<GpuMat>& dst);
|
CV_EXPORTS void split(const GpuMat& src, vector<GpuMat>& dst, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
//! copies each plane of a multi-channel array to a dedicated array (async version)
|
|
||||||
CV_EXPORTS void split(const GpuMat& src, GpuMat* dst, const Stream& stream);
|
|
||||||
|
|
||||||
//! copies each plane of a multi-channel array to a dedicated array (async version)
|
|
||||||
CV_EXPORTS void split(const GpuMat& src, vector<GpuMat>& dst, const Stream& stream);
|
|
||||||
|
|
||||||
//! computes magnitude of complex (x(i).re, x(i).im) vector
|
//! computes magnitude of complex (x(i).re, x(i).im) vector
|
||||||
//! supports only CV_32FC2 type
|
//! supports only CV_32FC2 type
|
||||||
CV_EXPORTS void magnitude(const GpuMat& x, GpuMat& magnitude);
|
CV_EXPORTS void magnitude(const GpuMat& x, GpuMat& magnitude, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
//! computes squared magnitude of complex (x(i).re, x(i).im) vector
|
//! computes squared magnitude of complex (x(i).re, x(i).im) vector
|
||||||
//! supports only CV_32FC2 type
|
//! supports only CV_32FC2 type
|
||||||
CV_EXPORTS void magnitudeSqr(const GpuMat& x, GpuMat& magnitude);
|
CV_EXPORTS void magnitudeSqr(const GpuMat& x, GpuMat& magnitude, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
//! computes magnitude of each (x(i), y(i)) vector
|
//! computes magnitude of each (x(i), y(i)) vector
|
||||||
//! supports only floating-point source
|
//! supports only floating-point source
|
||||||
CV_EXPORTS void magnitude(const GpuMat& x, const GpuMat& y, GpuMat& magnitude);
|
CV_EXPORTS void magnitude(const GpuMat& x, const GpuMat& y, GpuMat& magnitude, Stream& stream = Stream::Null());
|
||||||
//! async version
|
|
||||||
CV_EXPORTS void magnitude(const GpuMat& x, const GpuMat& y, GpuMat& magnitude, const Stream& stream);
|
|
||||||
|
|
||||||
//! computes squared magnitude of each (x(i), y(i)) vector
|
//! computes squared magnitude of each (x(i), y(i)) vector
|
||||||
//! supports only floating-point source
|
//! supports only floating-point source
|
||||||
CV_EXPORTS void magnitudeSqr(const GpuMat& x, const GpuMat& y, GpuMat& magnitude);
|
CV_EXPORTS void magnitudeSqr(const GpuMat& x, const GpuMat& y, GpuMat& magnitude, Stream& stream = Stream::Null());
|
||||||
//! async version
|
|
||||||
CV_EXPORTS void magnitudeSqr(const GpuMat& x, const GpuMat& y, GpuMat& magnitude, const Stream& stream);
|
|
||||||
|
|
||||||
//! computes angle (angle(i)) of each (x(i), y(i)) vector
|
//! computes angle (angle(i)) of each (x(i), y(i)) vector
|
||||||
//! supports only floating-point source
|
//! supports only floating-point source
|
||||||
CV_EXPORTS void phase(const GpuMat& x, const GpuMat& y, GpuMat& angle, bool angleInDegrees = false);
|
CV_EXPORTS void phase(const GpuMat& x, const GpuMat& y, GpuMat& angle, bool angleInDegrees = false, Stream& stream = Stream::Null());
|
||||||
//! async version
|
|
||||||
CV_EXPORTS void phase(const GpuMat& x, const GpuMat& y, GpuMat& angle, bool angleInDegrees, const Stream& stream);
|
|
||||||
|
|
||||||
//! converts Cartesian coordinates to polar
|
//! converts Cartesian coordinates to polar
|
||||||
//! supports only floating-point source
|
//! supports only floating-point source
|
||||||
CV_EXPORTS void cartToPolar(const GpuMat& x, const GpuMat& y, GpuMat& magnitude, GpuMat& angle, bool angleInDegrees = false);
|
CV_EXPORTS void cartToPolar(const GpuMat& x, const GpuMat& y, GpuMat& magnitude, GpuMat& angle, bool angleInDegrees = false, Stream& stream = Stream::Null());
|
||||||
//! async version
|
|
||||||
CV_EXPORTS void cartToPolar(const GpuMat& x, const GpuMat& y, GpuMat& magnitude, GpuMat& angle, bool angleInDegrees, const Stream& stream);
|
|
||||||
|
|
||||||
//! converts polar coordinates to Cartesian
|
//! converts polar coordinates to Cartesian
|
||||||
//! supports only floating-point source
|
//! supports only floating-point source
|
||||||
CV_EXPORTS void polarToCart(const GpuMat& magnitude, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees = false);
|
CV_EXPORTS void polarToCart(const GpuMat& magnitude, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees = false, Stream& stream = Stream::Null());
|
||||||
//! async version
|
|
||||||
CV_EXPORTS void polarToCart(const GpuMat& magnitude, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees, const Stream& stream);
|
|
||||||
|
|
||||||
|
|
||||||
//////////////////////////// Per-element operations ////////////////////////////////////
|
//////////////////////////// Per-element operations ////////////////////////////////////
|
||||||
|
|
||||||
//! adds one matrix to another (c = a + b)
|
//! adds one matrix to another (c = a + b)
|
||||||
//! supports CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1 types
|
//! supports CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1 types
|
||||||
CV_EXPORTS void add(const GpuMat& a, const GpuMat& b, GpuMat& c);
|
CV_EXPORTS void add(const GpuMat& a, const GpuMat& b, GpuMat& c, Stream& stream = Stream::Null());
|
||||||
//! adds scalar to a matrix (c = a + s)
|
//! adds scalar to a matrix (c = a + s)
|
||||||
//! supports CV_32FC1 and CV_32FC2 type
|
//! supports CV_32FC1 and CV_32FC2 type
|
||||||
CV_EXPORTS void add(const GpuMat& a, const Scalar& sc, GpuMat& c);
|
CV_EXPORTS void add(const GpuMat& a, const Scalar& sc, GpuMat& c, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
//! subtracts one matrix from another (c = a - b)
|
//! subtracts one matrix from another (c = a - b)
|
||||||
//! supports CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1 types
|
//! supports CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1 types
|
||||||
CV_EXPORTS void subtract(const GpuMat& a, const GpuMat& b, GpuMat& c);
|
CV_EXPORTS void subtract(const GpuMat& a, const GpuMat& b, GpuMat& c, Stream& stream = Stream::Null());
|
||||||
//! subtracts scalar from a matrix (c = a - s)
|
//! subtracts scalar from a matrix (c = a - s)
|
||||||
//! supports CV_32FC1 and CV_32FC2 type
|
//! supports CV_32FC1 and CV_32FC2 type
|
||||||
CV_EXPORTS void subtract(const GpuMat& a, const Scalar& sc, GpuMat& c);
|
CV_EXPORTS void subtract(const GpuMat& a, const Scalar& sc, GpuMat& c, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
//! computes element-wise product of the two arrays (c = a * b)
|
//! computes element-wise product of the two arrays (c = a * b)
|
||||||
//! supports CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1 types
|
//! supports CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1 types
|
||||||
CV_EXPORTS void multiply(const GpuMat& a, const GpuMat& b, GpuMat& c);
|
CV_EXPORTS void multiply(const GpuMat& a, const GpuMat& b, GpuMat& c, Stream& stream = Stream::Null());
|
||||||
//! multiplies matrix to a scalar (c = a * s)
|
//! multiplies matrix to a scalar (c = a * s)
|
||||||
//! supports CV_32FC1 and CV_32FC2 type
|
//! supports CV_32FC1 and CV_32FC2 type
|
||||||
CV_EXPORTS void multiply(const GpuMat& a, const Scalar& sc, GpuMat& c);
|
CV_EXPORTS void multiply(const GpuMat& a, const Scalar& sc, GpuMat& c, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
//! computes element-wise quotient of the two arrays (c = a / b)
|
//! computes element-wise quotient of the two arrays (c = a / b)
|
||||||
//! supports CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1 types
|
//! supports CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1 types
|
||||||
CV_EXPORTS void divide(const GpuMat& a, const GpuMat& b, GpuMat& c);
|
CV_EXPORTS void divide(const GpuMat& a, const GpuMat& b, GpuMat& c, Stream& stream = Stream::Null());
|
||||||
//! computes element-wise quotient of matrix and scalar (c = a / s)
|
//! computes element-wise quotient of matrix and scalar (c = a / s)
|
||||||
//! supports CV_32FC1 and CV_32FC2 type
|
//! supports CV_32FC1 and CV_32FC2 type
|
||||||
CV_EXPORTS void divide(const GpuMat& a, const Scalar& sc, GpuMat& c);
|
CV_EXPORTS void divide(const GpuMat& a, const Scalar& sc, GpuMat& c, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
//! computes exponent of each matrix element (b = e**a)
|
//! computes exponent of each matrix element (b = e**a)
|
||||||
//! supports only CV_32FC1 type
|
//! supports only CV_32FC1 type
|
||||||
CV_EXPORTS void exp(const GpuMat& a, GpuMat& b);
|
CV_EXPORTS void exp(const GpuMat& a, GpuMat& b, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
//! computes natural logarithm of absolute value of each matrix element: b = log(abs(a))
|
//! computes natural logarithm of absolute value of each matrix element: b = log(abs(a))
|
||||||
//! supports only CV_32FC1 type
|
//! supports only CV_32FC1 type
|
||||||
CV_EXPORTS void log(const GpuMat& a, GpuMat& b);
|
CV_EXPORTS void log(const GpuMat& a, GpuMat& b, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
//! computes element-wise absolute difference of two arrays (c = abs(a - b))
|
//! computes element-wise absolute difference of two arrays (c = abs(a - b))
|
||||||
//! supports CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1 types
|
//! supports CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1 types
|
||||||
CV_EXPORTS void absdiff(const GpuMat& a, const GpuMat& b, GpuMat& c);
|
CV_EXPORTS void absdiff(const GpuMat& a, const GpuMat& b, GpuMat& c, Stream& stream = Stream::Null());
|
||||||
//! computes element-wise absolute difference of array and scalar (c = abs(a - s))
|
//! computes element-wise absolute difference of array and scalar (c = abs(a - s))
|
||||||
//! supports only CV_32FC1 type
|
//! supports only CV_32FC1 type
|
||||||
CV_EXPORTS void absdiff(const GpuMat& a, const Scalar& s, GpuMat& c);
|
CV_EXPORTS void absdiff(const GpuMat& a, const Scalar& s, GpuMat& c, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
//! compares elements of two arrays (c = a <cmpop> b)
|
//! compares elements of two arrays (c = a <cmpop> b)
|
||||||
//! supports CV_8UC4, CV_32FC1 types
|
//! supports CV_8UC4, CV_32FC1 types
|
||||||
CV_EXPORTS void compare(const GpuMat& a, const GpuMat& b, GpuMat& c, int cmpop);
|
CV_EXPORTS void compare(const GpuMat& a, const GpuMat& b, GpuMat& c, int cmpop, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
//! performs per-elements bit-wise inversion
|
//! performs per-elements bit-wise inversion
|
||||||
CV_EXPORTS void bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask=GpuMat());
|
CV_EXPORTS void bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null());
|
||||||
//! async version
|
|
||||||
CV_EXPORTS void bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask, const Stream& stream);
|
|
||||||
|
|
||||||
//! calculates per-element bit-wise disjunction of two arrays
|
//! calculates per-element bit-wise disjunction of two arrays
|
||||||
CV_EXPORTS void bitwise_or(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat());
|
CV_EXPORTS void bitwise_or(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null());
|
||||||
//! async version
|
|
||||||
CV_EXPORTS void bitwise_or(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, const Stream& stream);
|
|
||||||
|
|
||||||
//! calculates per-element bit-wise conjunction of two arrays
|
//! calculates per-element bit-wise conjunction of two arrays
|
||||||
CV_EXPORTS void bitwise_and(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat());
|
CV_EXPORTS void bitwise_and(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null());
|
||||||
//! async version
|
|
||||||
CV_EXPORTS void bitwise_and(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, const Stream& stream);
|
|
||||||
|
|
||||||
//! calculates per-element bit-wise "exclusive or" operation
|
//! calculates per-element bit-wise "exclusive or" operation
|
||||||
CV_EXPORTS void bitwise_xor(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat());
|
CV_EXPORTS void bitwise_xor(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null());
|
||||||
//! async version
|
|
||||||
CV_EXPORTS void bitwise_xor(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, const Stream& stream);
|
|
||||||
|
|
||||||
//! computes per-element minimum of two arrays (dst = min(src1, src2))
|
//! computes per-element minimum of two arrays (dst = min(src1, src2))
|
||||||
CV_EXPORTS void min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst);
|
CV_EXPORTS void min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream = Stream::Null());
|
||||||
//! Async version
|
|
||||||
CV_EXPORTS void min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Stream& stream);
|
|
||||||
|
|
||||||
//! computes per-element minimum of array and scalar (dst = min(src1, src2))
|
//! computes per-element minimum of array and scalar (dst = min(src1, src2))
|
||||||
CV_EXPORTS void min(const GpuMat& src1, double src2, GpuMat& dst);
|
CV_EXPORTS void min(const GpuMat& src1, double src2, GpuMat& dst, Stream& stream = Stream::Null());
|
||||||
//! Async version
|
|
||||||
CV_EXPORTS void min(const GpuMat& src1, double src2, GpuMat& dst, const Stream& stream);
|
|
||||||
|
|
||||||
//! computes per-element maximum of two arrays (dst = max(src1, src2))
|
//! computes per-element maximum of two arrays (dst = max(src1, src2))
|
||||||
CV_EXPORTS void max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst);
|
CV_EXPORTS void max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream = Stream::Null());
|
||||||
//! Async version
|
|
||||||
CV_EXPORTS void max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Stream& stream);
|
|
||||||
|
|
||||||
//! computes per-element maximum of array and scalar (dst = max(src1, src2))
|
//! computes per-element maximum of array and scalar (dst = max(src1, src2))
|
||||||
CV_EXPORTS void max(const GpuMat& src1, double src2, GpuMat& dst);
|
CV_EXPORTS void max(const GpuMat& src1, double src2, GpuMat& dst, Stream& stream = Stream::Null());
|
||||||
//! Async version
|
|
||||||
CV_EXPORTS void max(const GpuMat& src1, double src2, GpuMat& dst, const Stream& stream);
|
|
||||||
|
|
||||||
|
|
||||||
////////////////////////////// Image processing //////////////////////////////
|
////////////////////////////// Image processing //////////////////////////////
|
||||||
@@ -645,68 +616,60 @@ namespace cv
|
|||||||
//! Does coloring of disparity image: [0..ndisp) -> [0..240, 1, 1] in HSV.
|
//! Does coloring of disparity image: [0..ndisp) -> [0..240, 1, 1] in HSV.
|
||||||
//! Supported types of input disparity: CV_8U, CV_16S.
|
//! Supported types of input disparity: CV_8U, CV_16S.
|
||||||
//! Output disparity has CV_8UC4 type in BGRA format (alpha = 255).
|
//! Output disparity has CV_8UC4 type in BGRA format (alpha = 255).
|
||||||
CV_EXPORTS void drawColorDisp(const GpuMat& src_disp, GpuMat& dst_disp, int ndisp);
|
CV_EXPORTS void drawColorDisp(const GpuMat& src_disp, GpuMat& dst_disp, int ndisp, Stream& stream = Stream::Null());
|
||||||
//! async version
|
|
||||||
CV_EXPORTS void drawColorDisp(const GpuMat& src_disp, GpuMat& dst_disp, int ndisp, const Stream& stream);
|
|
||||||
|
|
||||||
//! Reprojects disparity image to 3D space.
|
//! Reprojects disparity image to 3D space.
|
||||||
//! Supports CV_8U and CV_16S types of input disparity.
|
//! Supports CV_8U and CV_16S types of input disparity.
|
||||||
//! The output is a 4-channel floating-point (CV_32FC4) matrix.
|
//! The output is a 4-channel floating-point (CV_32FC4) matrix.
|
||||||
//! Each element of this matrix will contain the 3D coordinates of the point (x,y,z,1), computed from the disparity map.
|
//! Each element of this matrix will contain the 3D coordinates of the point (x,y,z,1), computed from the disparity map.
|
||||||
//! Q is the 4x4 perspective transformation matrix that can be obtained with cvStereoRectify.
|
//! Q is the 4x4 perspective transformation matrix that can be obtained with cvStereoRectify.
|
||||||
CV_EXPORTS void reprojectImageTo3D(const GpuMat& disp, GpuMat& xyzw, const Mat& Q);
|
CV_EXPORTS void reprojectImageTo3D(const GpuMat& disp, GpuMat& xyzw, const Mat& Q, Stream& stream = Stream::Null());
|
||||||
//! async version
|
|
||||||
CV_EXPORTS void reprojectImageTo3D(const GpuMat& disp, GpuMat& xyzw, const Mat& Q, const Stream& stream);
|
|
||||||
|
|
||||||
//! converts image from one color space to another
|
//! converts image from one color space to another
|
||||||
CV_EXPORTS void cvtColor(const GpuMat& src, GpuMat& dst, int code, int dcn = 0);
|
CV_EXPORTS void cvtColor(const GpuMat& src, GpuMat& dst, int code, int dcn = 0, Stream& stream = Stream::Null());
|
||||||
//! async version
|
|
||||||
CV_EXPORTS void cvtColor(const GpuMat& src, GpuMat& dst, int code, int dcn, const Stream& stream);
|
|
||||||
|
|
||||||
//! applies fixed threshold to the image
|
//! applies fixed threshold to the image
|
||||||
CV_EXPORTS double threshold(const GpuMat& src, GpuMat& dst, double thresh, double maxval, int type);
|
CV_EXPORTS double threshold(const GpuMat& src, GpuMat& dst, double thresh, double maxval, int type, Stream& stream = Stream::Null());
|
||||||
//! async version
|
|
||||||
CV_EXPORTS double threshold(const GpuMat& src, GpuMat& dst, double thresh, double maxval, int type, const Stream& stream);
|
|
||||||
|
|
||||||
//! resizes the image
|
//! resizes the image
|
||||||
//! Supports INTER_NEAREST, INTER_LINEAR
|
//! Supports INTER_NEAREST, INTER_LINEAR
|
||||||
//! supports CV_8UC1, CV_8UC4 types
|
//! supports CV_8UC1, CV_8UC4 types
|
||||||
CV_EXPORTS void resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx=0, double fy=0, int interpolation = INTER_LINEAR);
|
CV_EXPORTS void resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx=0, double fy=0, int interpolation = INTER_LINEAR, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
//! warps the image using affine transformation
|
//! warps the image using affine transformation
|
||||||
//! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
|
//! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
|
||||||
CV_EXPORTS void warpAffine(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags = INTER_LINEAR);
|
CV_EXPORTS void warpAffine(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags = INTER_LINEAR, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
//! warps the image using perspective transformation
|
//! warps the image using perspective transformation
|
||||||
//! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
|
//! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
|
||||||
CV_EXPORTS void warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags = INTER_LINEAR);
|
CV_EXPORTS void warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags = INTER_LINEAR, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
//! rotate 8bit single or four channel image
|
//! rotate 8bit single or four channel image
|
||||||
//! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
|
//! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
|
||||||
//! supports CV_8UC1, CV_8UC4 types
|
//! supports CV_8UC1, CV_8UC4 types
|
||||||
CV_EXPORTS void rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift = 0, double yShift = 0, int interpolation = INTER_LINEAR);
|
CV_EXPORTS void rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift = 0, double yShift = 0, int interpolation = INTER_LINEAR, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
//! copies 2D array to a larger destination array and pads borders with user-specifiable constant
|
//! copies 2D array to a larger destination array and pads borders with user-specifiable constant
|
||||||
//! supports CV_8UC1, CV_8UC4, CV_32SC1 and CV_32FC1 types
|
//! supports CV_8UC1, CV_8UC4, CV_32SC1 and CV_32FC1 types
|
||||||
CV_EXPORTS void copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom, int left, int right, const Scalar& value = Scalar());
|
CV_EXPORTS void copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom, int left, int right, const Scalar& value = Scalar(), Stream& stream = Stream::Null());
|
||||||
|
|
||||||
//! computes the integral image
|
//! computes the integral image
|
||||||
//! sum will have CV_32S type, but will contain unsigned int values
|
//! sum will have CV_32S type, but will contain unsigned int values
|
||||||
//! supports only CV_8UC1 source type
|
//! supports only CV_8UC1 source type
|
||||||
CV_EXPORTS void integral(const GpuMat& src, GpuMat& sum);
|
CV_EXPORTS void integral(const GpuMat& src, GpuMat& sum, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
//! buffered version
|
//! buffered version
|
||||||
CV_EXPORTS void integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer);
|
CV_EXPORTS void integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
//! computes the integral image and integral for the squared image
|
//! computes the integral image and integral for the squared image
|
||||||
//! sum will have CV_32S type, sqsum - CV32F type
|
//! sum will have CV_32S type, sqsum - CV32F type
|
||||||
//! supports only CV_8UC1 source type
|
//! supports only CV_8UC1 source type
|
||||||
CV_EXPORTS void integral(const GpuMat& src, GpuMat& sum, GpuMat& sqsum);
|
CV_EXPORTS void integral(const GpuMat& src, GpuMat& sum, GpuMat& sqsum, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
//! computes squared integral image
|
//! computes squared integral image
|
||||||
//! result matrix will have 64F type, but will contain 64U values
|
//! result matrix will have 64F type, but will contain 64U values
|
||||||
//! supports source images of 8UC1 type only
|
//! supports source images of 8UC1 type only
|
||||||
CV_EXPORTS void sqrIntegral(const GpuMat& src, GpuMat& sqsum);
|
CV_EXPORTS void sqrIntegral(const GpuMat& src, GpuMat& sqsum, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
//! computes vertical sum, supports only CV_32FC1 images
|
//! computes vertical sum, supports only CV_32FC1 images
|
||||||
CV_EXPORTS void columnSum(const GpuMat& src, GpuMat& sum);
|
CV_EXPORTS void columnSum(const GpuMat& src, GpuMat& sum);
|
||||||
@@ -714,14 +677,7 @@ namespace cv
|
|||||||
//! computes the standard deviation of integral images
|
//! computes the standard deviation of integral images
|
||||||
//! supports only CV_32SC1 source type and CV_32FC1 sqr type
|
//! supports only CV_32SC1 source type and CV_32FC1 sqr type
|
||||||
//! output will have CV_32FC1 type
|
//! output will have CV_32FC1 type
|
||||||
CV_EXPORTS void rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, const Rect& rect);
|
CV_EXPORTS void rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, const Rect& rect, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
// applies Canny edge detector and produces the edge map
|
|
||||||
// disabled until fix crash
|
|
||||||
//CV_EXPORTS void Canny(const GpuMat& image, GpuMat& edges, double threshold1, double threshold2, int apertureSize = 3);
|
|
||||||
//CV_EXPORTS void Canny(const GpuMat& image, GpuMat& edges, GpuMat& buffer, double threshold1, double threshold2, int apertureSize = 3);
|
|
||||||
//CV_EXPORTS void Canny(const GpuMat& srcDx, const GpuMat& srcDy, GpuMat& edges, double threshold1, double threshold2, int apertureSize = 3);
|
|
||||||
//CV_EXPORTS void Canny(const GpuMat& srcDx, const GpuMat& srcDy, GpuMat& edges, GpuMat& buffer, double threshold1, double threshold2, int apertureSize = 3);
|
|
||||||
|
|
||||||
//! computes Harris cornerness criteria at each image pixel
|
//! computes Harris cornerness criteria at each image pixel
|
||||||
CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, double k, int borderType=BORDER_REFLECT101);
|
CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, double k, int borderType=BORDER_REFLECT101);
|
||||||
@@ -792,7 +748,7 @@ namespace cv
|
|||||||
//! performs linear blending of two images
|
//! performs linear blending of two images
|
||||||
//! to avoid accuracy errors sum of weigths shouldn't be very close to zero
|
//! to avoid accuracy errors sum of weigths shouldn't be very close to zero
|
||||||
CV_EXPORTS void blendLinear(const GpuMat& img1, const GpuMat& img2, const GpuMat& weights1, const GpuMat& weights2,
|
CV_EXPORTS void blendLinear(const GpuMat& img1, const GpuMat& img2, const GpuMat& weights1, const GpuMat& weights2,
|
||||||
GpuMat& result);
|
GpuMat& result, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
////////////////////////////// Matrix reductions //////////////////////////////
|
////////////////////////////// Matrix reductions //////////////////////////////
|
||||||
|
|
||||||
@@ -863,17 +819,11 @@ namespace cv
|
|||||||
///////////////////////////// Calibration 3D //////////////////////////////////
|
///////////////////////////// Calibration 3D //////////////////////////////////
|
||||||
|
|
||||||
CV_EXPORTS void transformPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec,
|
CV_EXPORTS void transformPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec,
|
||||||
GpuMat& dst);
|
GpuMat& dst, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
CV_EXPORTS void transformPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec,
|
|
||||||
GpuMat& dst, const Stream& stream);
|
|
||||||
|
|
||||||
CV_EXPORTS void projectPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec,
|
|
||||||
const Mat& camera_mat, const Mat& dist_coef, GpuMat& dst);
|
|
||||||
|
|
||||||
CV_EXPORTS void projectPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec,
|
CV_EXPORTS void projectPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec,
|
||||||
const Mat& camera_mat, const Mat& dist_coef, GpuMat& dst,
|
const Mat& camera_mat, const Mat& dist_coef, GpuMat& dst,
|
||||||
const Stream& stream);
|
Stream& stream = Stream::Null());
|
||||||
|
|
||||||
CV_EXPORTS void solvePnPRansac(const Mat& object, const Mat& image, const Mat& camera_mat,
|
CV_EXPORTS void solvePnPRansac(const Mat& object, const Mat& image, const Mat& camera_mat,
|
||||||
const Mat& dist_coef, Mat& rvec, Mat& tvec, bool use_extrinsic_guess=false,
|
const Mat& dist_coef, Mat& rvec, Mat& tvec, bool use_extrinsic_guess=false,
|
||||||
@@ -893,7 +843,7 @@ namespace cv
|
|||||||
public:
|
public:
|
||||||
BaseRowFilter_GPU(int ksize_, int anchor_) : ksize(ksize_), anchor(anchor_) {}
|
BaseRowFilter_GPU(int ksize_, int anchor_) : ksize(ksize_), anchor(anchor_) {}
|
||||||
virtual ~BaseRowFilter_GPU() {}
|
virtual ~BaseRowFilter_GPU() {}
|
||||||
virtual void operator()(const GpuMat& src, GpuMat& dst) = 0;
|
virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()) = 0;
|
||||||
int ksize, anchor;
|
int ksize, anchor;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -908,7 +858,7 @@ namespace cv
|
|||||||
public:
|
public:
|
||||||
BaseColumnFilter_GPU(int ksize_, int anchor_) : ksize(ksize_), anchor(anchor_) {}
|
BaseColumnFilter_GPU(int ksize_, int anchor_) : ksize(ksize_), anchor(anchor_) {}
|
||||||
virtual ~BaseColumnFilter_GPU() {}
|
virtual ~BaseColumnFilter_GPU() {}
|
||||||
virtual void operator()(const GpuMat& src, GpuMat& dst) = 0;
|
virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()) = 0;
|
||||||
int ksize, anchor;
|
int ksize, anchor;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -922,7 +872,7 @@ namespace cv
|
|||||||
public:
|
public:
|
||||||
BaseFilter_GPU(const Size& ksize_, const Point& anchor_) : ksize(ksize_), anchor(anchor_) {}
|
BaseFilter_GPU(const Size& ksize_, const Point& anchor_) : ksize(ksize_), anchor(anchor_) {}
|
||||||
virtual ~BaseFilter_GPU() {}
|
virtual ~BaseFilter_GPU() {}
|
||||||
virtual void operator()(const GpuMat& src, GpuMat& dst) = 0;
|
virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()) = 0;
|
||||||
Size ksize;
|
Size ksize;
|
||||||
Point anchor;
|
Point anchor;
|
||||||
};
|
};
|
||||||
@@ -938,7 +888,7 @@ namespace cv
|
|||||||
public:
|
public:
|
||||||
virtual ~FilterEngine_GPU() {}
|
virtual ~FilterEngine_GPU() {}
|
||||||
|
|
||||||
virtual void apply(const GpuMat& src, GpuMat& dst, Rect roi = Rect(0,0,-1,-1)) = 0;
|
virtual void apply(const GpuMat& src, GpuMat& dst, Rect roi = Rect(0,0,-1,-1), Stream& stream = Stream::Null()) = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
//! returns the non-separable filter engine with the specified filter
|
//! returns the non-separable filter engine with the specified filter
|
||||||
@@ -1027,47 +977,47 @@ namespace cv
|
|||||||
|
|
||||||
//! smooths the image using the normalized box filter
|
//! smooths the image using the normalized box filter
|
||||||
//! supports CV_8UC1, CV_8UC4 types
|
//! supports CV_8UC1, CV_8UC4 types
|
||||||
CV_EXPORTS void boxFilter(const GpuMat& src, GpuMat& dst, int ddepth, Size ksize, Point anchor = Point(-1,-1));
|
CV_EXPORTS void boxFilter(const GpuMat& src, GpuMat& dst, int ddepth, Size ksize, Point anchor = Point(-1,-1), Stream& stream = Stream::Null());
|
||||||
|
|
||||||
//! a synonym for normalized box filter
|
//! a synonym for normalized box filter
|
||||||
static inline void blur(const GpuMat& src, GpuMat& dst, Size ksize, Point anchor = Point(-1,-1)) { boxFilter(src, dst, -1, ksize, anchor); }
|
static inline void blur(const GpuMat& src, GpuMat& dst, Size ksize, Point anchor = Point(-1,-1), Stream& stream = Stream::Null()) { boxFilter(src, dst, -1, ksize, anchor, stream); }
|
||||||
|
|
||||||
//! erodes the image (applies the local minimum operator)
|
//! erodes the image (applies the local minimum operator)
|
||||||
CV_EXPORTS void erode( const GpuMat& src, GpuMat& dst, const Mat& kernel, Point anchor = Point(-1, -1), int iterations = 1);
|
CV_EXPORTS void erode( const GpuMat& src, GpuMat& dst, const Mat& kernel, Point anchor = Point(-1, -1), int iterations = 1, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
//! dilates the image (applies the local maximum operator)
|
//! dilates the image (applies the local maximum operator)
|
||||||
CV_EXPORTS void dilate( const GpuMat& src, GpuMat& dst, const Mat& kernel, Point anchor = Point(-1, -1), int iterations = 1);
|
CV_EXPORTS void dilate( const GpuMat& src, GpuMat& dst, const Mat& kernel, Point anchor = Point(-1, -1), int iterations = 1, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
//! applies an advanced morphological operation to the image
|
//! applies an advanced morphological operation to the image
|
||||||
CV_EXPORTS void morphologyEx( const GpuMat& src, GpuMat& dst, int op, const Mat& kernel, Point anchor = Point(-1, -1), int iterations = 1);
|
CV_EXPORTS void morphologyEx( const GpuMat& src, GpuMat& dst, int op, const Mat& kernel, Point anchor = Point(-1, -1), int iterations = 1, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
//! applies non-separable 2D linear filter to the image
|
//! applies non-separable 2D linear filter to the image
|
||||||
CV_EXPORTS void filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernel, Point anchor=Point(-1,-1));
|
CV_EXPORTS void filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernel, Point anchor=Point(-1,-1), Stream& stream = Stream::Null());
|
||||||
|
|
||||||
//! applies separable 2D linear filter to the image
|
//! applies separable 2D linear filter to the image
|
||||||
CV_EXPORTS void sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY,
|
CV_EXPORTS void sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY,
|
||||||
Point anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
|
Point anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
//! applies generalized Sobel operator to the image
|
//! applies generalized Sobel operator to the image
|
||||||
CV_EXPORTS void Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, int ksize = 3, double scale = 1,
|
CV_EXPORTS void Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, int ksize = 3, double scale = 1,
|
||||||
int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
|
int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
//! applies the vertical or horizontal Scharr operator to the image
|
//! applies the vertical or horizontal Scharr operator to the image
|
||||||
CV_EXPORTS void Scharr(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, double scale = 1,
|
CV_EXPORTS void Scharr(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, double scale = 1,
|
||||||
int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
|
int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
//! smooths the image using Gaussian filter.
|
//! smooths the image using Gaussian filter.
|
||||||
CV_EXPORTS void GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, double sigma1, double sigma2 = 0,
|
CV_EXPORTS void GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, double sigma1, double sigma2 = 0,
|
||||||
int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
|
int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
//! applies Laplacian operator to the image
|
//! applies Laplacian operator to the image
|
||||||
//! supports only ksize = 1 and ksize = 3
|
//! supports only ksize = 1 and ksize = 3
|
||||||
CV_EXPORTS void Laplacian(const GpuMat& src, GpuMat& dst, int ddepth, int ksize = 1, double scale = 1);
|
CV_EXPORTS void Laplacian(const GpuMat& src, GpuMat& dst, int ddepth, int ksize = 1, double scale = 1, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
//////////////////////////////// Image Labeling ////////////////////////////////
|
//////////////////////////////// Image Labeling ////////////////////////////////
|
||||||
|
|
||||||
//!performs labeling via graph cuts
|
//!performs labeling via graph cuts
|
||||||
CV_EXPORTS void graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& bottom, GpuMat& labels, GpuMat& buf);
|
CV_EXPORTS void graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& bottom, GpuMat& labels, GpuMat& buf, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
////////////////////////////////// Histograms //////////////////////////////////
|
////////////////////////////////// Histograms //////////////////////////////////
|
||||||
|
|
||||||
@@ -1076,23 +1026,23 @@ namespace cv
|
|||||||
//! Calculates histogram with evenly distributed bins for signle channel source.
|
//! Calculates histogram with evenly distributed bins for signle channel source.
|
||||||
//! Supports CV_8UC1, CV_16UC1 and CV_16SC1 source types.
|
//! Supports CV_8UC1, CV_16UC1 and CV_16SC1 source types.
|
||||||
//! Output hist will have one row and histSize cols and CV_32SC1 type.
|
//! Output hist will have one row and histSize cols and CV_32SC1 type.
|
||||||
CV_EXPORTS void histEven(const GpuMat& src, GpuMat& hist, int histSize, int lowerLevel, int upperLevel);
|
CV_EXPORTS void histEven(const GpuMat& src, GpuMat& hist, int histSize, int lowerLevel, int upperLevel, Stream& stream = Stream::Null());
|
||||||
//! Calculates histogram with evenly distributed bins for four-channel source.
|
//! Calculates histogram with evenly distributed bins for four-channel source.
|
||||||
//! All channels of source are processed separately.
|
//! All channels of source are processed separately.
|
||||||
//! Supports CV_8UC4, CV_16UC4 and CV_16SC4 source types.
|
//! Supports CV_8UC4, CV_16UC4 and CV_16SC4 source types.
|
||||||
//! Output hist[i] will have one row and histSize[i] cols and CV_32SC1 type.
|
//! Output hist[i] will have one row and histSize[i] cols and CV_32SC1 type.
|
||||||
CV_EXPORTS void histEven(const GpuMat& src, GpuMat hist[4], int histSize[4], int lowerLevel[4], int upperLevel[4]);
|
CV_EXPORTS void histEven(const GpuMat& src, GpuMat hist[4], int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream = Stream::Null());
|
||||||
//! Calculates histogram with bins determined by levels array.
|
//! Calculates histogram with bins determined by levels array.
|
||||||
//! levels must have one row and CV_32SC1 type if source has integer type or CV_32FC1 otherwise.
|
//! levels must have one row and CV_32SC1 type if source has integer type or CV_32FC1 otherwise.
|
||||||
//! Supports CV_8UC1, CV_16UC1, CV_16SC1 and CV_32FC1 source types.
|
//! Supports CV_8UC1, CV_16UC1, CV_16SC1 and CV_32FC1 source types.
|
||||||
//! Output hist will have one row and (levels.cols-1) cols and CV_32SC1 type.
|
//! Output hist will have one row and (levels.cols-1) cols and CV_32SC1 type.
|
||||||
CV_EXPORTS void histRange(const GpuMat& src, GpuMat& hist, const GpuMat& levels);
|
CV_EXPORTS void histRange(const GpuMat& src, GpuMat& hist, const GpuMat& levels, Stream& stream = Stream::Null());
|
||||||
//! Calculates histogram with bins determined by levels array.
|
//! Calculates histogram with bins determined by levels array.
|
||||||
//! All levels must have one row and CV_32SC1 type if source has integer type or CV_32FC1 otherwise.
|
//! All levels must have one row and CV_32SC1 type if source has integer type or CV_32FC1 otherwise.
|
||||||
//! All channels of source are processed separately.
|
//! All channels of source are processed separately.
|
||||||
//! Supports CV_8UC4, CV_16UC4, CV_16SC4 and CV_32FC4 source types.
|
//! Supports CV_8UC4, CV_16UC4, CV_16SC4 and CV_32FC4 source types.
|
||||||
//! Output hist[i] will have one row and (levels[i].cols-1) cols and CV_32SC1 type.
|
//! Output hist[i] will have one row and (levels[i].cols-1) cols and CV_32SC1 type.
|
||||||
CV_EXPORTS void histRange(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4]);
|
CV_EXPORTS void histRange(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], Stream& stream = Stream::Null());
|
||||||
|
|
||||||
//////////////////////////////// StereoBM_GPU ////////////////////////////////
|
//////////////////////////////// StereoBM_GPU ////////////////////////////////
|
||||||
|
|
||||||
@@ -1110,10 +1060,7 @@ namespace cv
|
|||||||
|
|
||||||
//! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair
|
//! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair
|
||||||
//! Output disparity has CV_8U type.
|
//! Output disparity has CV_8U type.
|
||||||
void operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity);
|
void operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
//! async version
|
|
||||||
void operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity, const Stream & stream);
|
|
||||||
|
|
||||||
//! Some heuristics that tries to estmate
|
//! Some heuristics that tries to estmate
|
||||||
// if current GPU will be faster than CPU in this algorithm.
|
// if current GPU will be faster than CPU in this algorithm.
|
||||||
@@ -1165,15 +1112,11 @@ namespace cv
|
|||||||
|
|
||||||
//! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair,
|
//! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair,
|
||||||
//! if disparity is empty output type will be CV_16S else output type will be disparity.type().
|
//! if disparity is empty output type will be CV_16S else output type will be disparity.type().
|
||||||
void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disparity);
|
void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disparity, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
//! async version
|
|
||||||
void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disparity, Stream& stream);
|
|
||||||
|
|
||||||
|
|
||||||
//! version for user specified data term
|
//! version for user specified data term
|
||||||
void operator()(const GpuMat& data, GpuMat& disparity);
|
void operator()(const GpuMat& data, GpuMat& disparity, Stream& stream = Stream::Null());
|
||||||
void operator()(const GpuMat& data, GpuMat& disparity, Stream& stream);
|
|
||||||
|
|
||||||
int ndisp;
|
int ndisp;
|
||||||
|
|
||||||
@@ -1194,7 +1137,7 @@ namespace cv
|
|||||||
|
|
||||||
/////////////////////////// StereoConstantSpaceBP ///////////////////////////
|
/////////////////////////// StereoConstantSpaceBP ///////////////////////////
|
||||||
// "A Constant-Space Belief Propagation Algorithm for Stereo Matching"
|
// "A Constant-Space Belief Propagation Algorithm for Stereo Matching"
|
||||||
// Qingxiong Yang, Liang Wang<EFBFBD>, Narendra Ahuja
|
// Qingxiong Yang, Liang Wang, Narendra Ahuja
|
||||||
// http://vision.ai.uiuc.edu/~qyang6/
|
// http://vision.ai.uiuc.edu/~qyang6/
|
||||||
|
|
||||||
class CV_EXPORTS StereoConstantSpaceBP
|
class CV_EXPORTS StereoConstantSpaceBP
|
||||||
@@ -1224,10 +1167,7 @@ namespace cv
|
|||||||
|
|
||||||
//! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair,
|
//! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair,
|
||||||
//! if disparity is empty output type will be CV_16S else output type will be disparity.type().
|
//! if disparity is empty output type will be CV_16S else output type will be disparity.type().
|
||||||
void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disparity);
|
void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disparity, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
//! async version
|
|
||||||
void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disparity, Stream& stream);
|
|
||||||
|
|
||||||
int ndisp;
|
int ndisp;
|
||||||
|
|
||||||
@@ -1280,10 +1220,7 @@ namespace cv
|
|||||||
|
|
||||||
//! the disparity map refinement operator. Refine disparity map using joint bilateral filtering given a single color image.
|
//! the disparity map refinement operator. Refine disparity map using joint bilateral filtering given a single color image.
|
||||||
//! disparity must have CV_8U or CV_16S type, image must have CV_8UC1 or CV_8UC3 type.
|
//! disparity must have CV_8U or CV_16S type, image must have CV_8UC1 or CV_8UC3 type.
|
||||||
void operator()(const GpuMat& disparity, const GpuMat& image, GpuMat& dst);
|
void operator()(const GpuMat& disparity, const GpuMat& image, GpuMat& dst, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
//! async version
|
|
||||||
void operator()(const GpuMat& disparity, const GpuMat& image, GpuMat& dst, Stream& stream);
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
int ndisp;
|
int ndisp;
|
||||||
@@ -1406,7 +1343,7 @@ namespace cv
|
|||||||
// distance.at<float>(0, queryIdx) will contain distance
|
// distance.at<float>(0, queryIdx) will contain distance
|
||||||
void matchSingle(const GpuMat& queryDescs, const GpuMat& trainDescs,
|
void matchSingle(const GpuMat& queryDescs, const GpuMat& trainDescs,
|
||||||
GpuMat& trainIdx, GpuMat& distance,
|
GpuMat& trainIdx, GpuMat& distance,
|
||||||
const GpuMat& mask = GpuMat());
|
const GpuMat& mask = GpuMat(), Stream& stream = Stream::Null());
|
||||||
|
|
||||||
// Download trainIdx and distance to CPU vector with DMatch
|
// Download trainIdx and distance to CPU vector with DMatch
|
||||||
static void matchDownload(const GpuMat& trainIdx, const GpuMat& distance, std::vector<DMatch>& matches);
|
static void matchDownload(const GpuMat& trainIdx, const GpuMat& distance, std::vector<DMatch>& matches);
|
||||||
@@ -1425,7 +1362,7 @@ namespace cv
|
|||||||
// distance.at<float>(0, queryIdx) will contain distance
|
// distance.at<float>(0, queryIdx) will contain distance
|
||||||
void matchCollection(const GpuMat& queryDescs, const GpuMat& trainCollection,
|
void matchCollection(const GpuMat& queryDescs, const GpuMat& trainCollection,
|
||||||
GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance,
|
GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance,
|
||||||
const GpuMat& maskCollection);
|
const GpuMat& maskCollection, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
// Download trainIdx, imgIdx and distance to CPU vector with DMatch
|
// Download trainIdx, imgIdx and distance to CPU vector with DMatch
|
||||||
static void matchDownload(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance,
|
static void matchDownload(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance,
|
||||||
@@ -1443,7 +1380,7 @@ namespace cv
|
|||||||
// allDist.at<float>(queryIdx, trainIdx) will contain FLT_MAX, if trainIdx is one from k best,
|
// allDist.at<float>(queryIdx, trainIdx) will contain FLT_MAX, if trainIdx is one from k best,
|
||||||
// otherwise it will contain distance between queryIdx and trainIdx descriptors
|
// otherwise it will contain distance between queryIdx and trainIdx descriptors
|
||||||
void knnMatch(const GpuMat& queryDescs, const GpuMat& trainDescs,
|
void knnMatch(const GpuMat& queryDescs, const GpuMat& trainDescs,
|
||||||
GpuMat& trainIdx, GpuMat& distance, GpuMat& allDist, int k, const GpuMat& mask = GpuMat());
|
GpuMat& trainIdx, GpuMat& distance, GpuMat& allDist, int k, const GpuMat& mask = GpuMat(), Stream& stream = Stream::Null());
|
||||||
|
|
||||||
// Download trainIdx and distance to CPU vector with DMatch
|
// Download trainIdx and distance to CPU vector with DMatch
|
||||||
// compactResult is used when mask is not empty. If compactResult is false matches
|
// compactResult is used when mask is not empty. If compactResult is false matches
|
||||||
@@ -1478,7 +1415,7 @@ namespace cv
|
|||||||
// Matches doesn't sorted.
|
// Matches doesn't sorted.
|
||||||
void radiusMatch(const GpuMat& queryDescs, const GpuMat& trainDescs,
|
void radiusMatch(const GpuMat& queryDescs, const GpuMat& trainDescs,
|
||||||
GpuMat& trainIdx, GpuMat& nMatches, GpuMat& distance, float maxDistance,
|
GpuMat& trainIdx, GpuMat& nMatches, GpuMat& distance, float maxDistance,
|
||||||
const GpuMat& mask = GpuMat());
|
const GpuMat& mask = GpuMat(), Stream& stream = Stream::Null());
|
||||||
|
|
||||||
// Download trainIdx, nMatches and distance to CPU vector with DMatch.
|
// Download trainIdx, nMatches and distance to CPU vector with DMatch.
|
||||||
// matches will be sorted in increasing order of distances.
|
// matches will be sorted in increasing order of distances.
|
||||||
|
|||||||
@@ -48,37 +48,36 @@ using namespace std;
|
|||||||
|
|
||||||
#if !defined (HAVE_CUDA)
|
#if !defined (HAVE_CUDA)
|
||||||
|
|
||||||
void cv::gpu::transpose(const GpuMat&, GpuMat&) { throw_nogpu(); }
|
void cv::gpu::transpose(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::flip(const GpuMat&, GpuMat&, int) { throw_nogpu(); }
|
void cv::gpu::flip(const GpuMat&, GpuMat&, int, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::LUT(const GpuMat&, const Mat&, GpuMat&) { throw_nogpu(); }
|
void cv::gpu::LUT(const GpuMat&, const Mat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::exp(const GpuMat&, GpuMat&) { throw_nogpu(); }
|
void cv::gpu::exp(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::log(const GpuMat&, GpuMat&) { throw_nogpu(); }
|
void cv::gpu::log(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::magnitude(const GpuMat&, GpuMat&) { throw_nogpu(); }
|
void cv::gpu::magnitude(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::magnitudeSqr(const GpuMat&, GpuMat&) { throw_nogpu(); }
|
void cv::gpu::magnitudeSqr(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::magnitude(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); }
|
void cv::gpu::magnitude(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::magnitude(const GpuMat&, const GpuMat&, GpuMat&, const Stream&) { throw_nogpu(); }
|
void cv::gpu::magnitudeSqr(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::magnitudeSqr(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); }
|
void cv::gpu::phase(const GpuMat&, const GpuMat&, GpuMat&, bool, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::magnitudeSqr(const GpuMat&, const GpuMat&, GpuMat&, const Stream&) { throw_nogpu(); }
|
void cv::gpu::cartToPolar(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, bool, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::phase(const GpuMat&, const GpuMat&, GpuMat&, bool) { throw_nogpu(); }
|
void cv::gpu::polarToCart(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, bool, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::phase(const GpuMat&, const GpuMat&, GpuMat&, bool, const Stream&) { throw_nogpu(); }
|
|
||||||
void cv::gpu::cartToPolar(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, bool) { throw_nogpu(); }
|
|
||||||
void cv::gpu::cartToPolar(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, bool, const Stream&) { throw_nogpu(); }
|
|
||||||
void cv::gpu::polarToCart(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, bool) { throw_nogpu(); }
|
|
||||||
void cv::gpu::polarToCart(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, bool, const Stream&) { throw_nogpu(); }
|
|
||||||
|
|
||||||
#else /* !defined (HAVE_CUDA) */
|
#else /* !defined (HAVE_CUDA) */
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// transpose
|
// transpose
|
||||||
|
|
||||||
void cv::gpu::transpose(const GpuMat& src, GpuMat& dst)
|
void cv::gpu::transpose(const GpuMat& src, GpuMat& dst, Stream& s)
|
||||||
{
|
{
|
||||||
CV_Assert(src.elemSize() == 1 || src.elemSize() == 4 || src.elemSize() == 8);
|
CV_Assert(src.elemSize() == 1 || src.elemSize() == 4 || src.elemSize() == 8);
|
||||||
|
|
||||||
dst.create( src.cols, src.rows, src.type() );
|
dst.create( src.cols, src.rows, src.type() );
|
||||||
|
|
||||||
|
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||||
|
|
||||||
if (src.elemSize() == 1)
|
if (src.elemSize() == 1)
|
||||||
{
|
{
|
||||||
|
NppStreamHandler h(stream);
|
||||||
|
|
||||||
NppiSize sz;
|
NppiSize sz;
|
||||||
sz.width = src.cols;
|
sz.width = src.cols;
|
||||||
sz.height = src.rows;
|
sz.height = src.rows;
|
||||||
@@ -87,6 +86,8 @@ void cv::gpu::transpose(const GpuMat& src, GpuMat& dst)
|
|||||||
}
|
}
|
||||||
else if (src.elemSize() == 4)
|
else if (src.elemSize() == 4)
|
||||||
{
|
{
|
||||||
|
NppStStreamHandler h(stream);
|
||||||
|
|
||||||
NcvSize32u sz;
|
NcvSize32u sz;
|
||||||
sz.width = src.cols;
|
sz.width = src.cols;
|
||||||
sz.height = src.rows;
|
sz.height = src.rows;
|
||||||
@@ -96,6 +97,8 @@ void cv::gpu::transpose(const GpuMat& src, GpuMat& dst)
|
|||||||
}
|
}
|
||||||
else // if (src.elemSize() == 8)
|
else // if (src.elemSize() == 8)
|
||||||
{
|
{
|
||||||
|
NppStStreamHandler h(stream);
|
||||||
|
|
||||||
NcvSize32u sz;
|
NcvSize32u sz;
|
||||||
sz.width = src.cols;
|
sz.width = src.cols;
|
||||||
sz.height = src.rows;
|
sz.height = src.rows;
|
||||||
@@ -104,13 +107,14 @@ void cv::gpu::transpose(const GpuMat& src, GpuMat& dst)
|
|||||||
dst.ptr<Ncv64u>(), dst.step, sz) );
|
dst.ptr<Ncv64u>(), dst.step, sz) );
|
||||||
}
|
}
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
if (stream == 0)
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// flip
|
// flip
|
||||||
|
|
||||||
void cv::gpu::flip(const GpuMat& src, GpuMat& dst, int flipCode)
|
void cv::gpu::flip(const GpuMat& src, GpuMat& dst, int flipCode, Stream& s)
|
||||||
{
|
{
|
||||||
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4);
|
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4);
|
||||||
|
|
||||||
@@ -120,6 +124,10 @@ void cv::gpu::flip(const GpuMat& src, GpuMat& dst, int flipCode)
|
|||||||
sz.width = src.cols;
|
sz.width = src.cols;
|
||||||
sz.height = src.rows;
|
sz.height = src.rows;
|
||||||
|
|
||||||
|
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||||
|
|
||||||
|
NppStreamHandler h(stream);
|
||||||
|
|
||||||
if (src.type() == CV_8UC1)
|
if (src.type() == CV_8UC1)
|
||||||
{
|
{
|
||||||
nppSafeCall( nppiMirror_8u_C1R(src.ptr<Npp8u>(), src.step,
|
nppSafeCall( nppiMirror_8u_C1R(src.ptr<Npp8u>(), src.step,
|
||||||
@@ -133,13 +141,14 @@ void cv::gpu::flip(const GpuMat& src, GpuMat& dst, int flipCode)
|
|||||||
(flipCode == 0 ? NPP_HORIZONTAL_AXIS : (flipCode > 0 ? NPP_VERTICAL_AXIS : NPP_BOTH_AXIS))) );
|
(flipCode == 0 ? NPP_HORIZONTAL_AXIS : (flipCode > 0 ? NPP_VERTICAL_AXIS : NPP_BOTH_AXIS))) );
|
||||||
}
|
}
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
if (stream == 0)
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// LUT
|
// LUT
|
||||||
|
|
||||||
void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst)
|
void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst, Stream& s)
|
||||||
{
|
{
|
||||||
class LevelsInit
|
class LevelsInit
|
||||||
{
|
{
|
||||||
@@ -172,6 +181,10 @@ void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst)
|
|||||||
Mat nppLut;
|
Mat nppLut;
|
||||||
lut.convertTo(nppLut, CV_32S);
|
lut.convertTo(nppLut, CV_32S);
|
||||||
|
|
||||||
|
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||||
|
|
||||||
|
NppStreamHandler h(stream);
|
||||||
|
|
||||||
if (src.type() == CV_8UC1)
|
if (src.type() == CV_8UC1)
|
||||||
{
|
{
|
||||||
nppSafeCall( nppiLUT_Linear_8u_C1R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz, nppLut.ptr<Npp32s>(), lvls.pLevels, 256) );
|
nppSafeCall( nppiLUT_Linear_8u_C1R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz, nppLut.ptr<Npp32s>(), lvls.pLevels, 256) );
|
||||||
@@ -192,13 +205,14 @@ void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst)
|
|||||||
nppSafeCall( nppiLUT_Linear_8u_C3R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz, pValues3, lvls.pLevels3, lvls.nValues3) );
|
nppSafeCall( nppiLUT_Linear_8u_C3R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz, pValues3, lvls.pLevels3, lvls.nValues3) );
|
||||||
}
|
}
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
if (stream == 0)
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// exp
|
// exp
|
||||||
|
|
||||||
void cv::gpu::exp(const GpuMat& src, GpuMat& dst)
|
void cv::gpu::exp(const GpuMat& src, GpuMat& dst, Stream& s)
|
||||||
{
|
{
|
||||||
CV_Assert(src.type() == CV_32FC1);
|
CV_Assert(src.type() == CV_32FC1);
|
||||||
|
|
||||||
@@ -208,15 +222,20 @@ void cv::gpu::exp(const GpuMat& src, GpuMat& dst)
|
|||||||
sz.width = src.cols;
|
sz.width = src.cols;
|
||||||
sz.height = src.rows;
|
sz.height = src.rows;
|
||||||
|
|
||||||
|
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||||
|
|
||||||
|
NppStreamHandler h(stream);
|
||||||
|
|
||||||
nppSafeCall( nppiExp_32f_C1R(src.ptr<Npp32f>(), src.step, dst.ptr<Npp32f>(), dst.step, sz) );
|
nppSafeCall( nppiExp_32f_C1R(src.ptr<Npp32f>(), src.step, dst.ptr<Npp32f>(), dst.step, sz) );
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
if (stream == 0)
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// log
|
// log
|
||||||
|
|
||||||
void cv::gpu::log(const GpuMat& src, GpuMat& dst)
|
void cv::gpu::log(const GpuMat& src, GpuMat& dst, Stream& s)
|
||||||
{
|
{
|
||||||
CV_Assert(src.type() == CV_32FC1);
|
CV_Assert(src.type() == CV_32FC1);
|
||||||
|
|
||||||
@@ -226,9 +245,14 @@ void cv::gpu::log(const GpuMat& src, GpuMat& dst)
|
|||||||
sz.width = src.cols;
|
sz.width = src.cols;
|
||||||
sz.height = src.rows;
|
sz.height = src.rows;
|
||||||
|
|
||||||
|
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||||
|
|
||||||
|
NppStreamHandler h(stream);
|
||||||
|
|
||||||
nppSafeCall( nppiLn_32f_C1R(src.ptr<Npp32f>(), src.step, dst.ptr<Npp32f>(), dst.step, sz) );
|
nppSafeCall( nppiLn_32f_C1R(src.ptr<Npp32f>(), src.step, dst.ptr<Npp32f>(), dst.step, sz) );
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
if (stream == 0)
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
@@ -238,7 +262,7 @@ namespace
|
|||||||
{
|
{
|
||||||
typedef NppStatus (*nppMagnitude_t)(const Npp32fc* pSrc, int nSrcStep, Npp32f* pDst, int nDstStep, NppiSize oSizeROI);
|
typedef NppStatus (*nppMagnitude_t)(const Npp32fc* pSrc, int nSrcStep, Npp32f* pDst, int nDstStep, NppiSize oSizeROI);
|
||||||
|
|
||||||
inline void npp_magnitude(const GpuMat& src, GpuMat& dst, nppMagnitude_t func)
|
inline void npp_magnitude(const GpuMat& src, GpuMat& dst, nppMagnitude_t func, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
CV_Assert(src.type() == CV_32FC2);
|
CV_Assert(src.type() == CV_32FC2);
|
||||||
|
|
||||||
@@ -248,20 +272,23 @@ namespace
|
|||||||
sz.width = src.cols;
|
sz.width = src.cols;
|
||||||
sz.height = src.rows;
|
sz.height = src.rows;
|
||||||
|
|
||||||
|
NppStreamHandler h(stream);
|
||||||
|
|
||||||
nppSafeCall( func(src.ptr<Npp32fc>(), src.step, dst.ptr<Npp32f>(), dst.step, sz) );
|
nppSafeCall( func(src.ptr<Npp32fc>(), src.step, dst.ptr<Npp32f>(), dst.step, sz) );
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
if (stream == 0)
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::magnitude(const GpuMat& src, GpuMat& dst)
|
void cv::gpu::magnitude(const GpuMat& src, GpuMat& dst, Stream& stream)
|
||||||
{
|
{
|
||||||
::npp_magnitude(src, dst, nppiMagnitude_32fc32f_C1R);
|
::npp_magnitude(src, dst, nppiMagnitude_32fc32f_C1R, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::magnitudeSqr(const GpuMat& src, GpuMat& dst)
|
void cv::gpu::magnitudeSqr(const GpuMat& src, GpuMat& dst, Stream& stream)
|
||||||
{
|
{
|
||||||
::npp_magnitude(src, dst, nppiMagnitudeSqr_32fc32f_C1R);
|
::npp_magnitude(src, dst, nppiMagnitudeSqr_32fc32f_C1R, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
@@ -310,52 +337,27 @@ namespace
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::magnitude(const GpuMat& x, const GpuMat& y, GpuMat& dst)
|
void cv::gpu::magnitude(const GpuMat& x, const GpuMat& y, GpuMat& dst, Stream& stream)
|
||||||
{
|
|
||||||
::cartToPolar_caller(x, y, &dst, false, 0, false, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
void cv::gpu::magnitude(const GpuMat& x, const GpuMat& y, GpuMat& dst, const Stream& stream)
|
|
||||||
{
|
{
|
||||||
::cartToPolar_caller(x, y, &dst, false, 0, false, StreamAccessor::getStream(stream));
|
::cartToPolar_caller(x, y, &dst, false, 0, false, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::magnitudeSqr(const GpuMat& x, const GpuMat& y, GpuMat& dst)
|
void cv::gpu::magnitudeSqr(const GpuMat& x, const GpuMat& y, GpuMat& dst, Stream& stream)
|
||||||
{
|
|
||||||
::cartToPolar_caller(x, y, &dst, true, 0, false, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
void cv::gpu::magnitudeSqr(const GpuMat& x, const GpuMat& y, GpuMat& dst, const Stream& stream)
|
|
||||||
{
|
{
|
||||||
::cartToPolar_caller(x, y, &dst, true, 0, false, StreamAccessor::getStream(stream));
|
::cartToPolar_caller(x, y, &dst, true, 0, false, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::phase(const GpuMat& x, const GpuMat& y, GpuMat& angle, bool angleInDegrees)
|
void cv::gpu::phase(const GpuMat& x, const GpuMat& y, GpuMat& angle, bool angleInDegrees, Stream& stream)
|
||||||
{
|
|
||||||
::cartToPolar_caller(x, y, 0, false, &angle, angleInDegrees, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
void cv::gpu::phase(const GpuMat& x, const GpuMat& y, GpuMat& angle, bool angleInDegrees, const Stream& stream)
|
|
||||||
{
|
{
|
||||||
::cartToPolar_caller(x, y, 0, false, &angle, angleInDegrees, StreamAccessor::getStream(stream));
|
::cartToPolar_caller(x, y, 0, false, &angle, angleInDegrees, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::cartToPolar(const GpuMat& x, const GpuMat& y, GpuMat& mag, GpuMat& angle, bool angleInDegrees)
|
void cv::gpu::cartToPolar(const GpuMat& x, const GpuMat& y, GpuMat& mag, GpuMat& angle, bool angleInDegrees, Stream& stream)
|
||||||
{
|
|
||||||
::cartToPolar_caller(x, y, &mag, false, &angle, angleInDegrees, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
void cv::gpu::cartToPolar(const GpuMat& x, const GpuMat& y, GpuMat& mag, GpuMat& angle, bool angleInDegrees, const Stream& stream)
|
|
||||||
{
|
{
|
||||||
::cartToPolar_caller(x, y, &mag, false, &angle, angleInDegrees, StreamAccessor::getStream(stream));
|
::cartToPolar_caller(x, y, &mag, false, &angle, angleInDegrees, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::polarToCart(const GpuMat& magnitude, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees)
|
void cv::gpu::polarToCart(const GpuMat& magnitude, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees, Stream& stream)
|
||||||
{
|
|
||||||
::polarToCart_caller(magnitude, angle, x, y, angleInDegrees, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
void cv::gpu::polarToCart(const GpuMat& magnitude, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees, const Stream& stream)
|
|
||||||
{
|
{
|
||||||
::polarToCart_caller(magnitude, angle, x, y, angleInDegrees, StreamAccessor::getStream(stream));
|
::polarToCart_caller(magnitude, angle, x, y, angleInDegrees, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -51,7 +51,6 @@ using namespace std;
|
|||||||
cv::gpu::DisparityBilateralFilter::DisparityBilateralFilter(int, int, int) { throw_nogpu(); }
|
cv::gpu::DisparityBilateralFilter::DisparityBilateralFilter(int, int, int) { throw_nogpu(); }
|
||||||
cv::gpu::DisparityBilateralFilter::DisparityBilateralFilter(int, int, int, float, float, float) { throw_nogpu(); }
|
cv::gpu::DisparityBilateralFilter::DisparityBilateralFilter(int, int, int, float, float, float) { throw_nogpu(); }
|
||||||
|
|
||||||
void cv::gpu::DisparityBilateralFilter::operator()(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); }
|
|
||||||
void cv::gpu::DisparityBilateralFilter::operator()(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
void cv::gpu::DisparityBilateralFilter::operator()(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||||
|
|
||||||
#else /* !defined (HAVE_CUDA) */
|
#else /* !defined (HAVE_CUDA) */
|
||||||
@@ -101,7 +100,7 @@ namespace
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
void bilateral_filter_operator(int ndisp, int radius, int iters, float edge_threshold,float max_disc_threshold,
|
void bilateral_filter_operator(int ndisp, int radius, int iters, float edge_threshold,float max_disc_threshold,
|
||||||
GpuMat& table_color, GpuMat& table_space,
|
GpuMat& table_color, GpuMat& table_space,
|
||||||
const GpuMat& disp, const GpuMat& img, GpuMat& dst, cudaStream_t stream)
|
const GpuMat& disp, const GpuMat& img, GpuMat& dst, Stream& stream)
|
||||||
{
|
{
|
||||||
short edge_disc = max<short>(short(1), short(ndisp * edge_threshold + 0.5));
|
short edge_disc = max<short>(short(1), short(ndisp * edge_threshold + 0.5));
|
||||||
short max_disc = short(ndisp * max_disc_threshold + 0.5);
|
short max_disc = short(ndisp * max_disc_threshold + 0.5);
|
||||||
@@ -109,14 +108,19 @@ namespace
|
|||||||
bf::load_constants(table_color.ptr<float>(), table_space, ndisp, radius, edge_disc, max_disc);
|
bf::load_constants(table_color.ptr<float>(), table_space, ndisp, radius, edge_disc, max_disc);
|
||||||
|
|
||||||
if (&dst != &disp)
|
if (&dst != &disp)
|
||||||
disp.copyTo(dst);
|
{
|
||||||
|
if (stream)
|
||||||
|
stream.enqueueCopy(disp, dst);
|
||||||
|
else
|
||||||
|
disp.copyTo(dst);
|
||||||
|
}
|
||||||
|
|
||||||
bf::bilateral_filter_gpu((DevMem2D_<T>)dst, img, img.channels(), iters, stream);
|
bf::bilateral_filter_gpu((DevMem2D_<T>)dst, img, img.channels(), iters, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef void (*bilateral_filter_operator_t)(int ndisp, int radius, int iters, float edge_threshold, float max_disc_threshold,
|
typedef void (*bilateral_filter_operator_t)(int ndisp, int radius, int iters, float edge_threshold, float max_disc_threshold,
|
||||||
GpuMat& table_color, GpuMat& table_space,
|
GpuMat& table_color, GpuMat& table_space,
|
||||||
const GpuMat& disp, const GpuMat& img, GpuMat& dst, cudaStream_t stream);
|
const GpuMat& disp, const GpuMat& img, GpuMat& dst, Stream& stream);
|
||||||
|
|
||||||
const bilateral_filter_operator_t operators[] =
|
const bilateral_filter_operator_t operators[] =
|
||||||
{bilateral_filter_operator<unsigned char>, 0, 0, bilateral_filter_operator<short>, 0, 0, 0, 0};
|
{bilateral_filter_operator<unsigned char>, 0, 0, bilateral_filter_operator<short>, 0, 0, 0, 0};
|
||||||
@@ -139,18 +143,11 @@ cv::gpu::DisparityBilateralFilter::DisparityBilateralFilter(int ndisp_, int radi
|
|||||||
calc_space_weighted_filter(table_space, radius * 2 + 1, radius + 1.0f);
|
calc_space_weighted_filter(table_space, radius * 2 + 1, radius + 1.0f);
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::DisparityBilateralFilter::operator()(const GpuMat& disp, const GpuMat& img, GpuMat& dst)
|
|
||||||
{
|
|
||||||
CV_DbgAssert(0 < ndisp && 0 < radius && 0 < iters);
|
|
||||||
CV_Assert(disp.rows == img.rows && disp.cols == img.cols && (disp.type() == CV_8U || disp.type() == CV_16S) && (img.type() == CV_8UC1 || img.type() == CV_8UC3));
|
|
||||||
operators[disp.type()](ndisp, radius, iters, edge_threshold, max_disc_threshold, table_color, table_space, disp, img, dst, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
void cv::gpu::DisparityBilateralFilter::operator()(const GpuMat& disp, const GpuMat& img, GpuMat& dst, Stream& stream)
|
void cv::gpu::DisparityBilateralFilter::operator()(const GpuMat& disp, const GpuMat& img, GpuMat& dst, Stream& stream)
|
||||||
{
|
{
|
||||||
CV_DbgAssert(0 < ndisp && 0 < radius && 0 < iters);
|
CV_DbgAssert(0 < ndisp && 0 < radius && 0 < iters);
|
||||||
CV_Assert(disp.rows == img.rows && disp.cols == img.cols && (disp.type() == CV_8U || disp.type() == CV_16S) && (img.type() == CV_8UC1 || img.type() == CV_8UC3));
|
CV_Assert(disp.rows == img.rows && disp.cols == img.cols && (disp.type() == CV_8U || disp.type() == CV_16S) && (img.type() == CV_8UC1 || img.type() == CV_8UC3));
|
||||||
operators[disp.type()](ndisp, radius, iters, edge_threshold, max_disc_threshold, table_color, table_space, disp, img, dst, StreamAccessor::getStream(stream));
|
operators[disp.type()](ndisp, radius, iters, edge_threshold, max_disc_threshold, table_color, table_space, disp, img, dst, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* !defined (HAVE_CUDA) */
|
#endif /* !defined (HAVE_CUDA) */
|
||||||
|
|||||||
@@ -48,8 +48,7 @@ using namespace cv::gpu;
|
|||||||
|
|
||||||
#if !defined (HAVE_CUDA)
|
#if !defined (HAVE_CUDA)
|
||||||
|
|
||||||
void cv::gpu::blendLinear(const GpuMat&, const GpuMat&, const GpuMat&, const GpuMat&,
|
void cv::gpu::blendLinear(const GpuMat&, const GpuMat&, const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||||
GpuMat&) { throw_nogpu(); }
|
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
@@ -57,14 +56,14 @@ namespace cv { namespace gpu
|
|||||||
{
|
{
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void blendLinearCaller(int rows, int cols, int cn, const PtrStep_<T> img1, const PtrStep_<T> img2,
|
void blendLinearCaller(int rows, int cols, int cn, const PtrStep_<T> img1, const PtrStep_<T> img2,
|
||||||
const PtrStep_<float> weights1, const PtrStep_<float> weights2, PtrStep_<T> result);
|
const PtrStep_<float> weights1, const PtrStep_<float> weights2, PtrStep_<T> result, cudaStream_t stream);
|
||||||
|
|
||||||
void blendLinearCaller8UC4(int rows, int cols, const PtrStep img1, const PtrStep img2,
|
void blendLinearCaller8UC4(int rows, int cols, const PtrStep img1, const PtrStep img2,
|
||||||
const PtrStepf weights1, const PtrStepf weights2, PtrStep result);
|
const PtrStepf weights1, const PtrStepf weights2, PtrStep result, cudaStream_t stream);
|
||||||
}}
|
}}
|
||||||
|
|
||||||
void cv::gpu::blendLinear(const GpuMat& img1, const GpuMat& img2, const GpuMat& weights1, const GpuMat& weights2,
|
void cv::gpu::blendLinear(const GpuMat& img1, const GpuMat& img2, const GpuMat& weights1, const GpuMat& weights2,
|
||||||
GpuMat& result)
|
GpuMat& result, Stream& stream)
|
||||||
{
|
{
|
||||||
CV_Assert(img1.size() == img2.size());
|
CV_Assert(img1.size() == img2.size());
|
||||||
CV_Assert(img1.type() == img2.type());
|
CV_Assert(img1.type() == img2.type());
|
||||||
@@ -83,12 +82,12 @@ void cv::gpu::blendLinear(const GpuMat& img1, const GpuMat& img2, const GpuMat&
|
|||||||
{
|
{
|
||||||
case CV_8U:
|
case CV_8U:
|
||||||
if (cn != 4)
|
if (cn != 4)
|
||||||
blendLinearCaller<uchar>(size.height, size.width, cn, img1, img2, weights1, weights2, result);
|
blendLinearCaller<uchar>(size.height, size.width, cn, img1, img2, weights1, weights2, result, StreamAccessor::getStream(stream));
|
||||||
else
|
else
|
||||||
blendLinearCaller8UC4(size.height, size.width, img1, img2, weights1, weights2, result);
|
blendLinearCaller8UC4(size.height, size.width, img1, img2, weights1, weights2, result, StreamAccessor::getStream(stream));
|
||||||
break;
|
break;
|
||||||
case CV_32F:
|
case CV_32F:
|
||||||
blendLinearCaller<float>(size.height, size.width, cn, img1, img2, weights1, weights2, result);
|
blendLinearCaller<float>(size.height, size.width, cn, img1, img2, weights1, weights2, result, StreamAccessor::getStream(stream));
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
CV_Error(CV_StsUnsupportedFormat, "bad image depth in linear blending function");
|
CV_Error(CV_StsUnsupportedFormat, "bad image depth in linear blending function");
|
||||||
|
|||||||
@@ -54,18 +54,18 @@ const vector<GpuMat>& cv::gpu::BruteForceMatcher_GPU_base::getTrainDescriptors()
|
|||||||
void cv::gpu::BruteForceMatcher_GPU_base::clear() { throw_nogpu(); }
|
void cv::gpu::BruteForceMatcher_GPU_base::clear() { throw_nogpu(); }
|
||||||
bool cv::gpu::BruteForceMatcher_GPU_base::empty() const { throw_nogpu(); return true; }
|
bool cv::gpu::BruteForceMatcher_GPU_base::empty() const { throw_nogpu(); return true; }
|
||||||
bool cv::gpu::BruteForceMatcher_GPU_base::isMaskSupported() const { throw_nogpu(); return true; }
|
bool cv::gpu::BruteForceMatcher_GPU_base::isMaskSupported() const { throw_nogpu(); return true; }
|
||||||
void cv::gpu::BruteForceMatcher_GPU_base::matchSingle(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, const GpuMat&) { throw_nogpu(); }
|
void cv::gpu::BruteForceMatcher_GPU_base::matchSingle(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::BruteForceMatcher_GPU_base::matchDownload(const GpuMat&, const GpuMat&, vector<DMatch>&) { throw_nogpu(); }
|
void cv::gpu::BruteForceMatcher_GPU_base::matchDownload(const GpuMat&, const GpuMat&, vector<DMatch>&) { throw_nogpu(); }
|
||||||
void cv::gpu::BruteForceMatcher_GPU_base::match(const GpuMat&, const GpuMat&, vector<DMatch>&, const GpuMat&) { throw_nogpu(); }
|
void cv::gpu::BruteForceMatcher_GPU_base::match(const GpuMat&, const GpuMat&, vector<DMatch>&, const GpuMat&) { throw_nogpu(); }
|
||||||
void cv::gpu::BruteForceMatcher_GPU_base::makeGpuCollection(GpuMat&, GpuMat&, const vector<GpuMat>&) { throw_nogpu(); }
|
void cv::gpu::BruteForceMatcher_GPU_base::makeGpuCollection(GpuMat&, GpuMat&, const vector<GpuMat>&) { throw_nogpu(); }
|
||||||
void cv::gpu::BruteForceMatcher_GPU_base::matchCollection(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat&, const GpuMat&) { throw_nogpu(); }
|
void cv::gpu::BruteForceMatcher_GPU_base::matchCollection(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::BruteForceMatcher_GPU_base::matchDownload(const GpuMat&, const GpuMat&, const GpuMat&, std::vector<DMatch>&) { throw_nogpu(); }
|
void cv::gpu::BruteForceMatcher_GPU_base::matchDownload(const GpuMat&, const GpuMat&, const GpuMat&, std::vector<DMatch>&) { throw_nogpu(); }
|
||||||
void cv::gpu::BruteForceMatcher_GPU_base::match(const GpuMat&, std::vector<DMatch>&, const std::vector<GpuMat>&) { throw_nogpu(); }
|
void cv::gpu::BruteForceMatcher_GPU_base::match(const GpuMat&, std::vector<DMatch>&, const std::vector<GpuMat>&) { throw_nogpu(); }
|
||||||
void cv::gpu::BruteForceMatcher_GPU_base::knnMatch(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat&, int, const GpuMat&) { throw_nogpu(); }
|
void cv::gpu::BruteForceMatcher_GPU_base::knnMatch(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat&, int, const GpuMat&, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::BruteForceMatcher_GPU_base::knnMatchDownload(const GpuMat&, const GpuMat&, std::vector< std::vector<DMatch> >&, bool) { throw_nogpu(); }
|
void cv::gpu::BruteForceMatcher_GPU_base::knnMatchDownload(const GpuMat&, const GpuMat&, std::vector< std::vector<DMatch> >&, bool) { throw_nogpu(); }
|
||||||
void cv::gpu::BruteForceMatcher_GPU_base::knnMatch(const GpuMat&, const GpuMat&, std::vector< std::vector<DMatch> >&, int, const GpuMat&, bool) { throw_nogpu(); }
|
void cv::gpu::BruteForceMatcher_GPU_base::knnMatch(const GpuMat&, const GpuMat&, std::vector< std::vector<DMatch> >&, int, const GpuMat&, bool) { throw_nogpu(); }
|
||||||
void cv::gpu::BruteForceMatcher_GPU_base::knnMatch(const GpuMat&, std::vector< std::vector<DMatch> >&, int, const std::vector<GpuMat>&, bool) { throw_nogpu(); }
|
void cv::gpu::BruteForceMatcher_GPU_base::knnMatch(const GpuMat&, std::vector< std::vector<DMatch> >&, int, const std::vector<GpuMat>&, bool) { throw_nogpu(); }
|
||||||
void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat&, float, const GpuMat&) { throw_nogpu(); }
|
void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat&, float, const GpuMat&, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchDownload(const GpuMat&, const GpuMat&, const GpuMat&, std::vector< std::vector<DMatch> >&, bool) { throw_nogpu(); }
|
void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchDownload(const GpuMat&, const GpuMat&, const GpuMat&, std::vector< std::vector<DMatch> >&, bool) { throw_nogpu(); }
|
||||||
void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat&, const GpuMat&, std::vector< std::vector<DMatch> >&, float, const GpuMat&, bool) { throw_nogpu(); }
|
void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat&, const GpuMat&, std::vector< std::vector<DMatch> >&, float, const GpuMat&, bool) { throw_nogpu(); }
|
||||||
void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat&, std::vector< std::vector<DMatch> >&, float, const std::vector<GpuMat>&, bool) { throw_nogpu(); }
|
void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat&, std::vector< std::vector<DMatch> >&, float, const std::vector<GpuMat>&, bool) { throw_nogpu(); }
|
||||||
@@ -77,47 +77,47 @@ namespace cv { namespace gpu { namespace bfmatcher
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
void matchSingleL1_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs,
|
void matchSingleL1_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs,
|
||||||
const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
||||||
bool cc_12);
|
bool cc_12, cudaStream_t stream);
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void matchSingleL2_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs,
|
void matchSingleL2_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs,
|
||||||
const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
||||||
bool cc_12);
|
bool cc_12, cudaStream_t stream);
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void matchSingleHamming_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs,
|
void matchSingleHamming_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs,
|
||||||
const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
||||||
bool cc_12);
|
bool cc_12, cudaStream_t stream);
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void matchCollectionL1_gpu(const DevMem2D& queryDescs, const DevMem2D& trainCollection,
|
void matchCollectionL1_gpu(const DevMem2D& queryDescs, const DevMem2D& trainCollection,
|
||||||
const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
||||||
bool cc_12);
|
bool cc_12, cudaStream_t stream);
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void matchCollectionL2_gpu(const DevMem2D& queryDescs, const DevMem2D& trainCollection,
|
void matchCollectionL2_gpu(const DevMem2D& queryDescs, const DevMem2D& trainCollection,
|
||||||
const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
||||||
bool cc_12);
|
bool cc_12, cudaStream_t stream);
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void matchCollectionHamming_gpu(const DevMem2D& queryDescs, const DevMem2D& trainCollection,
|
void matchCollectionHamming_gpu(const DevMem2D& queryDescs, const DevMem2D& trainCollection,
|
||||||
const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
||||||
bool cc_12);
|
bool cc_12, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void knnMatchL1_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn,
|
void knnMatchL1_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn,
|
||||||
const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist);
|
const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream);
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void knnMatchL2_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn,
|
void knnMatchL2_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn,
|
||||||
const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist);
|
const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream);
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void knnMatchHamming_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn,
|
void knnMatchHamming_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn,
|
||||||
const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist);
|
const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void radiusMatchL1_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance,
|
void radiusMatchL1_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance,
|
||||||
const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance);
|
const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance, cudaStream_t stream);
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void radiusMatchL2_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance,
|
void radiusMatchL2_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance,
|
||||||
const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance);
|
const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance, cudaStream_t stream);
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void radiusMatchHamming_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance,
|
void radiusMatchHamming_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance,
|
||||||
const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance);
|
const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance, cudaStream_t stream);
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
@@ -168,7 +168,7 @@ bool cv::gpu::BruteForceMatcher_GPU_base::isMaskSupported() const
|
|||||||
// Match
|
// Match
|
||||||
|
|
||||||
void cv::gpu::BruteForceMatcher_GPU_base::matchSingle(const GpuMat& queryDescs, const GpuMat& trainDescs,
|
void cv::gpu::BruteForceMatcher_GPU_base::matchSingle(const GpuMat& queryDescs, const GpuMat& trainDescs,
|
||||||
GpuMat& trainIdx, GpuMat& distance, const GpuMat& mask)
|
GpuMat& trainIdx, GpuMat& distance, const GpuMat& mask, Stream& stream)
|
||||||
{
|
{
|
||||||
if (queryDescs.empty() || trainDescs.empty())
|
if (queryDescs.empty() || trainDescs.empty())
|
||||||
return;
|
return;
|
||||||
@@ -177,7 +177,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchSingle(const GpuMat& queryDescs,
|
|||||||
|
|
||||||
typedef void (*match_caller_t)(const DevMem2D& queryDescs, const DevMem2D& trainDescs,
|
typedef void (*match_caller_t)(const DevMem2D& queryDescs, const DevMem2D& trainDescs,
|
||||||
const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
||||||
bool cc_12);
|
bool cc_12, cudaStream_t stream);
|
||||||
|
|
||||||
static const match_caller_t match_callers[3][8] =
|
static const match_caller_t match_callers[3][8] =
|
||||||
{
|
{
|
||||||
@@ -213,7 +213,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchSingle(const GpuMat& queryDescs,
|
|||||||
|
|
||||||
// For single train there is no need to save imgIdx, so we just save imgIdx to trainIdx.
|
// For single train there is no need to save imgIdx, so we just save imgIdx to trainIdx.
|
||||||
// trainIdx store after imgIdx, so we doesn't lose it value.
|
// trainIdx store after imgIdx, so we doesn't lose it value.
|
||||||
func(queryDescs, trainDescs, mask, trainIdx, trainIdx, distance, cc_12);
|
func(queryDescs, trainDescs, mask, trainIdx, trainIdx, distance, cc_12, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::BruteForceMatcher_GPU_base::matchDownload(const GpuMat& trainIdx, const GpuMat& distance,
|
void cv::gpu::BruteForceMatcher_GPU_base::matchDownload(const GpuMat& trainIdx, const GpuMat& distance,
|
||||||
@@ -301,7 +301,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::makeGpuCollection(GpuMat& trainCollect
|
|||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::BruteForceMatcher_GPU_base::matchCollection(const GpuMat& queryDescs, const GpuMat& trainCollection,
|
void cv::gpu::BruteForceMatcher_GPU_base::matchCollection(const GpuMat& queryDescs, const GpuMat& trainCollection,
|
||||||
GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance, const GpuMat& maskCollection)
|
GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance, const GpuMat& maskCollection, Stream& stream)
|
||||||
{
|
{
|
||||||
if (queryDescs.empty() || trainCollection.empty())
|
if (queryDescs.empty() || trainCollection.empty())
|
||||||
return;
|
return;
|
||||||
@@ -310,7 +310,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchCollection(const GpuMat& queryDes
|
|||||||
|
|
||||||
typedef void (*match_caller_t)(const DevMem2D& queryDescs, const DevMem2D& trainCollection,
|
typedef void (*match_caller_t)(const DevMem2D& queryDescs, const DevMem2D& trainCollection,
|
||||||
const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx,
|
const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx,
|
||||||
const DevMem2Df& distance, bool cc_12);
|
const DevMem2Df& distance, bool cc_12, cudaStream_t stream);
|
||||||
|
|
||||||
static const match_caller_t match_callers[3][8] =
|
static const match_caller_t match_callers[3][8] =
|
||||||
{
|
{
|
||||||
@@ -344,7 +344,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchCollection(const GpuMat& queryDes
|
|||||||
|
|
||||||
bool cc_12 = TargetArchs::builtWith(FEATURE_SET_COMPUTE_12) && DeviceInfo().supports(FEATURE_SET_COMPUTE_12);
|
bool cc_12 = TargetArchs::builtWith(FEATURE_SET_COMPUTE_12) && DeviceInfo().supports(FEATURE_SET_COMPUTE_12);
|
||||||
|
|
||||||
func(queryDescs, trainCollection, maskCollection, trainIdx, imgIdx, distance, cc_12);
|
func(queryDescs, trainCollection, maskCollection, trainIdx, imgIdx, distance, cc_12, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::BruteForceMatcher_GPU_base::matchDownload(const GpuMat& trainIdx, const GpuMat& imgIdx,
|
void cv::gpu::BruteForceMatcher_GPU_base::matchDownload(const GpuMat& trainIdx, const GpuMat& imgIdx,
|
||||||
@@ -403,7 +403,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::match(const GpuMat& queryDescs, vector
|
|||||||
// KnnMatch
|
// KnnMatch
|
||||||
|
|
||||||
void cv::gpu::BruteForceMatcher_GPU_base::knnMatch(const GpuMat& queryDescs, const GpuMat& trainDescs,
|
void cv::gpu::BruteForceMatcher_GPU_base::knnMatch(const GpuMat& queryDescs, const GpuMat& trainDescs,
|
||||||
GpuMat& trainIdx, GpuMat& distance, GpuMat& allDist, int k, const GpuMat& mask)
|
GpuMat& trainIdx, GpuMat& distance, GpuMat& allDist, int k, const GpuMat& mask, Stream& stream)
|
||||||
{
|
{
|
||||||
if (queryDescs.empty() || trainDescs.empty())
|
if (queryDescs.empty() || trainDescs.empty())
|
||||||
return;
|
return;
|
||||||
@@ -411,7 +411,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch(const GpuMat& queryDescs, con
|
|||||||
using namespace cv::gpu::bfmatcher;
|
using namespace cv::gpu::bfmatcher;
|
||||||
|
|
||||||
typedef void (*match_caller_t)(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn,
|
typedef void (*match_caller_t)(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn,
|
||||||
const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist);
|
const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream);
|
||||||
|
|
||||||
static const match_caller_t match_callers[3][8] =
|
static const match_caller_t match_callers[3][8] =
|
||||||
{
|
{
|
||||||
@@ -436,16 +436,24 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch(const GpuMat& queryDescs, con
|
|||||||
const int nTrain = trainDescs.rows;
|
const int nTrain = trainDescs.rows;
|
||||||
|
|
||||||
trainIdx.create(nQuery, k, CV_32S);
|
trainIdx.create(nQuery, k, CV_32S);
|
||||||
trainIdx.setTo(Scalar::all(-1));
|
|
||||||
distance.create(nQuery, k, CV_32F);
|
distance.create(nQuery, k, CV_32F);
|
||||||
|
|
||||||
ensureSizeIsEnough(nQuery, nTrain, CV_32FC1, allDist);
|
ensureSizeIsEnough(nQuery, nTrain, CV_32FC1, allDist);
|
||||||
allDist.setTo(Scalar::all(numeric_limits<float>::max()));
|
|
||||||
|
if (stream)
|
||||||
|
{
|
||||||
|
stream.enqueueMemSet(trainIdx, Scalar::all(-1));
|
||||||
|
stream.enqueueMemSet(allDist, Scalar::all(numeric_limits<float>::max()));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
trainIdx.setTo(Scalar::all(-1));
|
||||||
|
allDist.setTo(Scalar::all(numeric_limits<float>::max()));
|
||||||
|
}
|
||||||
|
|
||||||
match_caller_t func = match_callers[distType][queryDescs.depth()];
|
match_caller_t func = match_callers[distType][queryDescs.depth()];
|
||||||
CV_Assert(func != 0);
|
CV_Assert(func != 0);
|
||||||
|
|
||||||
func(queryDescs, trainDescs, k, mask, trainIdx, distance, allDist);
|
func(queryDescs, trainDescs, k, mask, trainIdx, distance, allDist, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::BruteForceMatcher_GPU_base::knnMatchDownload(const GpuMat& trainIdx, const GpuMat& distance,
|
void cv::gpu::BruteForceMatcher_GPU_base::knnMatchDownload(const GpuMat& trainIdx, const GpuMat& distance,
|
||||||
@@ -547,7 +555,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch(const GpuMat& queryDescs,
|
|||||||
// RadiusMatch
|
// RadiusMatch
|
||||||
|
|
||||||
void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat& queryDescs, const GpuMat& trainDescs,
|
void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat& queryDescs, const GpuMat& trainDescs,
|
||||||
GpuMat& trainIdx, GpuMat& nMatches, GpuMat& distance, float maxDistance, const GpuMat& mask)
|
GpuMat& trainIdx, GpuMat& nMatches, GpuMat& distance, float maxDistance, const GpuMat& mask, Stream& stream)
|
||||||
{
|
{
|
||||||
if (queryDescs.empty() || trainDescs.empty())
|
if (queryDescs.empty() || trainDescs.empty())
|
||||||
return;
|
return;
|
||||||
@@ -555,7 +563,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat& queryDescs,
|
|||||||
using namespace cv::gpu::bfmatcher;
|
using namespace cv::gpu::bfmatcher;
|
||||||
|
|
||||||
typedef void (*radiusMatch_caller_t)(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance,
|
typedef void (*radiusMatch_caller_t)(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance,
|
||||||
const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance);
|
const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance, cudaStream_t stream);
|
||||||
|
|
||||||
static const radiusMatch_caller_t radiusMatch_callers[3][8] =
|
static const radiusMatch_caller_t radiusMatch_callers[3][8] =
|
||||||
{
|
{
|
||||||
@@ -583,17 +591,21 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat& queryDescs,
|
|||||||
CV_Assert(trainIdx.empty() || (trainIdx.rows == nQuery && trainIdx.size() == distance.size()));
|
CV_Assert(trainIdx.empty() || (trainIdx.rows == nQuery && trainIdx.size() == distance.size()));
|
||||||
|
|
||||||
ensureSizeIsEnough(1, nQuery, CV_32SC1, nMatches);
|
ensureSizeIsEnough(1, nQuery, CV_32SC1, nMatches);
|
||||||
nMatches.setTo(Scalar::all(0));
|
|
||||||
if (trainIdx.empty())
|
if (trainIdx.empty())
|
||||||
{
|
{
|
||||||
trainIdx.create(nQuery, nTrain, CV_32SC1);
|
trainIdx.create(nQuery, nTrain, CV_32SC1);
|
||||||
distance.create(nQuery, nTrain, CV_32FC1);
|
distance.create(nQuery, nTrain, CV_32FC1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (stream)
|
||||||
|
stream.enqueueMemSet(nMatches, Scalar::all(0));
|
||||||
|
else
|
||||||
|
nMatches.setTo(Scalar::all(0));
|
||||||
|
|
||||||
radiusMatch_caller_t func = radiusMatch_callers[distType][queryDescs.depth()];
|
radiusMatch_caller_t func = radiusMatch_callers[distType][queryDescs.depth()];
|
||||||
CV_Assert(func != 0);
|
CV_Assert(func != 0);
|
||||||
|
|
||||||
func(queryDescs, trainDescs, maxDistance, mask, trainIdx, nMatches.ptr<unsigned int>(), distance);
|
func(queryDescs, trainDescs, maxDistance, mask, trainIdx, nMatches.ptr<unsigned int>(), distance, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchDownload(const GpuMat& trainIdx, const GpuMat& nMatches,
|
void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchDownload(const GpuMat& trainIdx, const GpuMat& nMatches,
|
||||||
|
|||||||
@@ -44,20 +44,11 @@
|
|||||||
|
|
||||||
#if !defined(HAVE_CUDA)
|
#if !defined(HAVE_CUDA)
|
||||||
|
|
||||||
void cv::gpu::transformPoints(const GpuMat&, const Mat&, const Mat&,
|
void cv::gpu::transformPoints(const GpuMat&, const Mat&, const Mat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||||
GpuMat&) { throw_nogpu(); }
|
|
||||||
|
|
||||||
void cv::gpu::transformPoints(const GpuMat&, const Mat&, const Mat&,
|
void cv::gpu::projectPoints(const GpuMat&, const Mat&, const Mat&, const Mat&, const Mat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||||
GpuMat&, const Stream&) { throw_nogpu(); }
|
|
||||||
|
|
||||||
void cv::gpu::projectPoints(const GpuMat&, const Mat&, const Mat&,
|
void cv::gpu::solvePnPRansac(const Mat&, const Mat&, const Mat&, const Mat&, Mat&, Mat&, bool, int, float, int, vector<int>*) { throw_nogpu(); }
|
||||||
const Mat&, const Mat&, GpuMat&) { throw_nogpu(); }
|
|
||||||
|
|
||||||
void cv::gpu::projectPoints(const GpuMat&, const Mat&, const Mat&,
|
|
||||||
const Mat&, const Mat&, GpuMat&, const Stream&) { throw_nogpu(); }
|
|
||||||
|
|
||||||
void cv::gpu::solvePnPRansac(const Mat&, const Mat&, const Mat&, const Mat&,
|
|
||||||
Mat&, Mat&, bool, int, float, int, vector<int>*) { throw_nogpu(); }
|
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
@@ -66,14 +57,12 @@ using namespace cv::gpu;
|
|||||||
|
|
||||||
namespace cv { namespace gpu { namespace transform_points
|
namespace cv { namespace gpu { namespace transform_points
|
||||||
{
|
{
|
||||||
void call(const DevMem2D_<float3> src, const float* rot, const float* transl,
|
void call(const DevMem2D_<float3> src, const float* rot, const float* transl, DevMem2D_<float3> dst, cudaStream_t stream);
|
||||||
DevMem2D_<float3> dst, cudaStream_t stream);
|
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
void transformPointsCaller(const GpuMat& src, const Mat& rvec, const Mat& tvec,
|
void transformPointsCaller(const GpuMat& src, const Mat& rvec, const Mat& tvec, GpuMat& dst, cudaStream_t stream)
|
||||||
GpuMat& dst, cudaStream_t stream)
|
|
||||||
{
|
{
|
||||||
CV_Assert(src.rows == 1 && src.cols > 0 && src.type() == CV_32FC3);
|
CV_Assert(src.rows == 1 && src.cols > 0 && src.type() == CV_32FC3);
|
||||||
CV_Assert(rvec.size() == Size(3, 1) && rvec.type() == CV_32F);
|
CV_Assert(rvec.size() == Size(3, 1) && rvec.type() == CV_32F);
|
||||||
@@ -88,30 +77,20 @@ namespace
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::transformPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec,
|
void cv::gpu::transformPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec, GpuMat& dst, Stream& stream)
|
||||||
GpuMat& dst)
|
|
||||||
{
|
|
||||||
::transformPointsCaller(src, rvec, tvec, dst, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
void cv::gpu::transformPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec,
|
|
||||||
GpuMat& dst, const Stream& stream)
|
|
||||||
{
|
{
|
||||||
::transformPointsCaller(src, rvec, tvec, dst, StreamAccessor::getStream(stream));
|
::transformPointsCaller(src, rvec, tvec, dst, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace project_points
|
namespace cv { namespace gpu { namespace project_points
|
||||||
{
|
{
|
||||||
void call(const DevMem2D_<float3> src, const float* rot, const float* transl,
|
void call(const DevMem2D_<float3> src, const float* rot, const float* transl, const float* proj, DevMem2D_<float2> dst, cudaStream_t stream);
|
||||||
const float* proj, DevMem2D_<float2> dst, cudaStream_t stream);
|
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
void projectPointsCaller(const GpuMat& src, const Mat& rvec, const Mat& tvec,
|
void projectPointsCaller(const GpuMat& src, const Mat& rvec, const Mat& tvec, const Mat& camera_mat, const Mat& dist_coef, GpuMat& dst, cudaStream_t stream)
|
||||||
const Mat& camera_mat, const Mat& dist_coef, GpuMat& dst,
|
|
||||||
cudaStream_t stream)
|
|
||||||
{
|
{
|
||||||
CV_Assert(src.rows == 1 && src.cols > 0 && src.type() == CV_32FC3);
|
CV_Assert(src.rows == 1 && src.cols > 0 && src.type() == CV_32FC3);
|
||||||
CV_Assert(rvec.size() == Size(3, 1) && rvec.type() == CV_32F);
|
CV_Assert(rvec.size() == Size(3, 1) && rvec.type() == CV_32F);
|
||||||
@@ -124,20 +103,11 @@ namespace
|
|||||||
Rodrigues(rvec, rot);
|
Rodrigues(rvec, rot);
|
||||||
|
|
||||||
dst.create(src.size(), CV_32FC2);
|
dst.create(src.size(), CV_32FC2);
|
||||||
project_points::call(src, rot.ptr<float>(), tvec.ptr<float>(),
|
project_points::call(src, rot.ptr<float>(), tvec.ptr<float>(), camera_mat.ptr<float>(), dst,stream);
|
||||||
camera_mat.ptr<float>(), dst,stream);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::projectPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec,
|
void cv::gpu::projectPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec, const Mat& camera_mat, const Mat& dist_coef, GpuMat& dst, Stream& stream)
|
||||||
const Mat& camera_mat, const Mat& dist_coef, GpuMat& dst)
|
|
||||||
{
|
|
||||||
::projectPointsCaller(src, rvec, tvec, camera_mat, dist_coef, dst, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
void cv::gpu::projectPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec,
|
|
||||||
const Mat& camera_mat, const Mat& dist_coef, GpuMat& dst,
|
|
||||||
const Stream& stream)
|
|
||||||
{
|
{
|
||||||
::projectPointsCaller(src, rvec, tvec, camera_mat, dist_coef, dst, StreamAccessor::getStream(stream));
|
::projectPointsCaller(src, rvec, tvec, camera_mat, dist_coef, dst, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -47,8 +47,7 @@ using namespace cv::gpu;
|
|||||||
|
|
||||||
#if !defined (HAVE_CUDA)
|
#if !defined (HAVE_CUDA)
|
||||||
|
|
||||||
void cv::gpu::cvtColor(const GpuMat&, GpuMat&, int, int) { throw_nogpu(); }
|
void cv::gpu::cvtColor(const GpuMat&, GpuMat&, int, int, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::cvtColor(const GpuMat&, GpuMat&, int, int, const Stream&) { throw_nogpu(); }
|
|
||||||
|
|
||||||
#else /* !defined (HAVE_CUDA) */
|
#else /* !defined (HAVE_CUDA) */
|
||||||
|
|
||||||
@@ -455,12 +454,7 @@ namespace
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::cvtColor(const GpuMat& src, GpuMat& dst, int code, int dcn)
|
void cv::gpu::cvtColor(const GpuMat& src, GpuMat& dst, int code, int dcn, Stream& stream)
|
||||||
{
|
|
||||||
cvtColor_caller(src, dst, code, dcn, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
void cv::gpu::cvtColor(const GpuMat& src, GpuMat& dst, int code, int dcn, const Stream& stream)
|
|
||||||
{
|
{
|
||||||
cvtColor_caller(src, dst, code, dcn, StreamAccessor::getStream(stream));
|
cvtColor_caller(src, dst, code, dcn, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -68,19 +68,22 @@ namespace cv { namespace gpu
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void blendLinearCaller(int rows, int cols, int cn, const PtrStep_<T> img1, const PtrStep_<T> img2,
|
void blendLinearCaller(int rows, int cols, int cn, const PtrStep_<T> img1, const PtrStep_<T> img2,
|
||||||
const PtrStepf weights1, const PtrStepf weights2, PtrStep_<T> result)
|
const PtrStepf weights1, const PtrStepf weights2, PtrStep_<T> result, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(16, 16);
|
dim3 threads(16, 16);
|
||||||
dim3 grid(divUp(cols * cn, threads.x), divUp(rows, threads.y));
|
dim3 grid(divUp(cols * cn, threads.x), divUp(rows, threads.y));
|
||||||
|
|
||||||
blendLinearKernel<<<grid, threads>>>(rows, cols * cn, cn, img1, img2, weights1, weights2, result);
|
blendLinearKernel<<<grid, threads, 0, stream>>>(rows, cols * cn, cn, img1, img2, weights1, weights2, result);
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
|
if (stream == 0)
|
||||||
|
cudaSafeCall(cudaDeviceSynchronize());
|
||||||
}
|
}
|
||||||
|
|
||||||
template void blendLinearCaller<uchar>(int, int, int, const PtrStep, const PtrStep,
|
template void blendLinearCaller<uchar>(int, int, int, const PtrStep, const PtrStep,
|
||||||
const PtrStepf, const PtrStepf, PtrStep);
|
const PtrStepf, const PtrStepf, PtrStep, cudaStream_t stream);
|
||||||
template void blendLinearCaller<float>(int, int, int, const PtrStepf, const PtrStepf,
|
template void blendLinearCaller<float>(int, int, int, const PtrStepf, const PtrStepf,
|
||||||
const PtrStepf, const PtrStepf, PtrStepf);
|
const PtrStepf, const PtrStepf, PtrStepf, cudaStream_t stream);
|
||||||
|
|
||||||
|
|
||||||
__global__ void blendLinearKernel8UC4(int rows, int cols, const PtrStep img1, const PtrStep img2,
|
__global__ void blendLinearKernel8UC4(int rows, int cols, const PtrStep img1, const PtrStep img2,
|
||||||
@@ -105,13 +108,16 @@ namespace cv { namespace gpu
|
|||||||
|
|
||||||
|
|
||||||
void blendLinearCaller8UC4(int rows, int cols, const PtrStep img1, const PtrStep img2,
|
void blendLinearCaller8UC4(int rows, int cols, const PtrStep img1, const PtrStep img2,
|
||||||
const PtrStepf weights1, const PtrStepf weights2, PtrStep result)
|
const PtrStepf weights1, const PtrStepf weights2, PtrStep result, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(16, 16);
|
dim3 threads(16, 16);
|
||||||
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
|
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
|
||||||
|
|
||||||
blendLinearKernel8UC4<<<grid, threads>>>(rows, cols, img1, img2, weights1, weights2, result);
|
blendLinearKernel8UC4<<<grid, threads, 0, stream>>>(rows, cols, img1, img2, weights1, weights2, result);
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
|
if (stream == 0)
|
||||||
|
cudaSafeCall(cudaDeviceSynchronize());
|
||||||
}
|
}
|
||||||
|
|
||||||
}}
|
}}
|
||||||
@@ -589,7 +589,7 @@ namespace cv { namespace gpu { namespace bfmatcher
|
|||||||
|
|
||||||
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, typename Dist, typename T, typename Train, typename Mask>
|
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, typename Dist, typename T, typename Train, typename Mask>
|
||||||
void matchSimple_caller(const DevMem2D_<T>& queryDescs, const Train& train,
|
void matchSimple_caller(const DevMem2D_<T>& queryDescs, const Train& train,
|
||||||
const Mask& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance)
|
const Mask& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
StaticAssert<BLOCK_DIM_Y <= 64>::check(); // blockDimY vals must reduce by warp
|
StaticAssert<BLOCK_DIM_Y <= 64>::check(); // blockDimY vals must reduce by warp
|
||||||
|
|
||||||
@@ -597,14 +597,15 @@ namespace cv { namespace gpu { namespace bfmatcher
|
|||||||
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y, 1);
|
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y, 1);
|
||||||
|
|
||||||
match<BLOCK_DIM_X, BLOCK_DIM_Y, ReduceDescCalculatorSimple<BLOCK_DIM_X, T>, Dist, T>
|
match<BLOCK_DIM_X, BLOCK_DIM_Y, ReduceDescCalculatorSimple<BLOCK_DIM_X, T>, Dist, T>
|
||||||
<<<grid, threads>>>(queryDescs, train, mask, trainIdx.data, imgIdx.data, distance.data);
|
<<<grid, threads, 0, stream>>>(queryDescs, train, mask, trainIdx.data, imgIdx.data, distance.data);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
if (stream == 0)
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, int MAX_DESCRIPTORS_LEN, bool DESC_LEN_EQ_MAX_LEN, typename Dist, typename T, typename Train, typename Mask>
|
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, int MAX_DESCRIPTORS_LEN, bool DESC_LEN_EQ_MAX_LEN, typename Dist, typename T, typename Train, typename Mask>
|
||||||
void matchCached_caller(const DevMem2D_<T>& queryDescs, const Train& train,
|
void matchCached_caller(const DevMem2D_<T>& queryDescs, const Train& train,
|
||||||
const Mask& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance)
|
const Mask& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
StaticAssert<BLOCK_DIM_Y <= 64>::check(); // blockDimY vals must reduce by warp
|
StaticAssert<BLOCK_DIM_Y <= 64>::check(); // blockDimY vals must reduce by warp
|
||||||
StaticAssert<BLOCK_DIM_X * BLOCK_DIM_Y >= MAX_DESCRIPTORS_LEN>::check(); // block size must be greter than descriptors length
|
StaticAssert<BLOCK_DIM_X * BLOCK_DIM_Y >= MAX_DESCRIPTORS_LEN>::check(); // block size must be greter than descriptors length
|
||||||
@@ -614,10 +615,11 @@ namespace cv { namespace gpu { namespace bfmatcher
|
|||||||
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y, 1);
|
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y, 1);
|
||||||
|
|
||||||
match<BLOCK_DIM_X, BLOCK_DIM_Y, ReduceDescCalculatorCached<BLOCK_DIM_X, MAX_DESCRIPTORS_LEN, DESC_LEN_EQ_MAX_LEN, T, typename Dist::ValueType>, Dist, T>
|
match<BLOCK_DIM_X, BLOCK_DIM_Y, ReduceDescCalculatorCached<BLOCK_DIM_X, MAX_DESCRIPTORS_LEN, DESC_LEN_EQ_MAX_LEN, T, typename Dist::ValueType>, Dist, T>
|
||||||
<<<grid, threads>>>(queryDescs, train, mask, trainIdx.data, imgIdx.data, distance.data);
|
<<<grid, threads, 0, stream>>>(queryDescs, train, mask, trainIdx.data, imgIdx.data, distance.data);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
if (stream == 0)
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
@@ -626,167 +628,165 @@ namespace cv { namespace gpu { namespace bfmatcher
|
|||||||
template <typename Dist, typename T, typename Train, typename Mask>
|
template <typename Dist, typename T, typename Train, typename Mask>
|
||||||
void matchDispatcher(const DevMem2D_<T>& queryDescs, const Train& train,
|
void matchDispatcher(const DevMem2D_<T>& queryDescs, const Train& train,
|
||||||
const Mask& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
const Mask& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
||||||
bool cc_12)
|
bool cc_12, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (queryDescs.cols < 64)
|
if (queryDescs.cols < 64)
|
||||||
matchCached_caller<16, 16, 64, false, Dist>(queryDescs, train, mask, trainIdx, imgIdx, distance);
|
matchCached_caller<16, 16, 64, false, Dist>(queryDescs, train, mask, trainIdx, imgIdx, distance, stream);
|
||||||
else if (queryDescs.cols == 64)
|
else if (queryDescs.cols == 64)
|
||||||
matchCached_caller<16, 16, 64, true, Dist>(queryDescs, train, mask, trainIdx, imgIdx, distance);
|
matchCached_caller<16, 16, 64, true, Dist>(queryDescs, train, mask, trainIdx, imgIdx, distance, stream);
|
||||||
else if (queryDescs.cols < 128)
|
else if (queryDescs.cols < 128)
|
||||||
matchCached_caller<16, 16, 128, false, Dist>(queryDescs, train, mask, trainIdx, imgIdx, distance);
|
matchCached_caller<16, 16, 128, false, Dist>(queryDescs, train, mask, trainIdx, imgIdx, distance, stream);
|
||||||
else if (queryDescs.cols == 128)
|
else if (queryDescs.cols == 128)
|
||||||
matchCached_caller<16, 16, 128, true, Dist>(queryDescs, train, mask, trainIdx, imgIdx, distance);
|
matchCached_caller<16, 16, 128, true, Dist>(queryDescs, train, mask, trainIdx, imgIdx, distance, stream);
|
||||||
else if (queryDescs.cols < 256)
|
else if (queryDescs.cols < 256)
|
||||||
matchCached_caller<16, 16, 256, false, Dist>(queryDescs, train, mask, trainIdx, imgIdx, distance);
|
matchCached_caller<16, 16, 256, false, Dist>(queryDescs, train, mask, trainIdx, imgIdx, distance, stream);
|
||||||
else if (queryDescs.cols == 256 && cc_12)
|
else if (queryDescs.cols == 256 && cc_12)
|
||||||
matchCached_caller<16, 16, 256, true, Dist>(queryDescs, train, mask, trainIdx, imgIdx, distance);
|
matchCached_caller<16, 16, 256, true, Dist>(queryDescs, train, mask, trainIdx, imgIdx, distance, stream);
|
||||||
else
|
else
|
||||||
matchSimple_caller<16, 16, Dist>(queryDescs, train, mask, trainIdx, imgIdx, distance);
|
matchSimple_caller<16, 16, Dist>(queryDescs, train, mask, trainIdx, imgIdx, distance, stream);
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void matchSingleL1_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs,
|
void matchSingleL1_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs,
|
||||||
const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
||||||
bool cc_12)
|
bool cc_12, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
SingleTrain<T> train((DevMem2D_<T>)trainDescs);
|
SingleTrain<T> train((DevMem2D_<T>)trainDescs);
|
||||||
if (mask.data)
|
if (mask.data)
|
||||||
{
|
{
|
||||||
SingleMask m(mask);
|
SingleMask m(mask);
|
||||||
matchDispatcher< L1Dist<T> >((DevMem2D_<T>)queryDescs, train, m, trainIdx, imgIdx, distance, cc_12);
|
matchDispatcher< L1Dist<T> >((DevMem2D_<T>)queryDescs, train, m, trainIdx, imgIdx, distance, cc_12, stream);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
matchDispatcher< L1Dist<T> >((DevMem2D_<T>)queryDescs, train, WithOutMask(), trainIdx, imgIdx, distance, cc_12);
|
matchDispatcher< L1Dist<T> >((DevMem2D_<T>)queryDescs, train, WithOutMask(), trainIdx, imgIdx, distance, cc_12, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template void matchSingleL1_gpu<uchar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
|
template void matchSingleL1_gpu<uchar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12, cudaStream_t stream);
|
||||||
template void matchSingleL1_gpu<schar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
|
template void matchSingleL1_gpu<schar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12, cudaStream_t stream);
|
||||||
template void matchSingleL1_gpu<ushort>(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
|
template void matchSingleL1_gpu<ushort>(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12, cudaStream_t stream);
|
||||||
template void matchSingleL1_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
|
template void matchSingleL1_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12, cudaStream_t stream);
|
||||||
template void matchSingleL1_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
|
template void matchSingleL1_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12, cudaStream_t stream);
|
||||||
template void matchSingleL1_gpu<float >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
|
template void matchSingleL1_gpu<float >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void matchSingleL2_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs,
|
void matchSingleL2_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs,
|
||||||
const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
||||||
bool cc_12)
|
bool cc_12, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
SingleTrain<T> train((DevMem2D_<T>)trainDescs);
|
SingleTrain<T> train((DevMem2D_<T>)trainDescs);
|
||||||
if (mask.data)
|
if (mask.data)
|
||||||
{
|
{
|
||||||
SingleMask m(mask);
|
SingleMask m(mask);
|
||||||
matchDispatcher<L2Dist>((DevMem2D_<T>)queryDescs, train, m, trainIdx, imgIdx, distance, cc_12);
|
matchDispatcher<L2Dist>((DevMem2D_<T>)queryDescs, train, m, trainIdx, imgIdx, distance, cc_12, stream);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
matchDispatcher<L2Dist>((DevMem2D_<T>)queryDescs, train, WithOutMask(), trainIdx, imgIdx, distance, cc_12);
|
matchDispatcher<L2Dist>((DevMem2D_<T>)queryDescs, train, WithOutMask(), trainIdx, imgIdx, distance, cc_12, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template void matchSingleL2_gpu<uchar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
|
template void matchSingleL2_gpu<uchar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12, cudaStream_t stream);
|
||||||
template void matchSingleL2_gpu<schar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
|
template void matchSingleL2_gpu<schar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12, cudaStream_t stream);
|
||||||
template void matchSingleL2_gpu<ushort>(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
|
template void matchSingleL2_gpu<ushort>(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12, cudaStream_t stream);
|
||||||
template void matchSingleL2_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
|
template void matchSingleL2_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12, cudaStream_t stream);
|
||||||
template void matchSingleL2_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
|
template void matchSingleL2_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12, cudaStream_t stream);
|
||||||
template void matchSingleL2_gpu<float >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
|
template void matchSingleL2_gpu<float >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void matchSingleHamming_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs,
|
void matchSingleHamming_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs,
|
||||||
const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
||||||
bool cc_12)
|
bool cc_12, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
SingleTrain<T> train((DevMem2D_<T>)trainDescs);
|
SingleTrain<T> train((DevMem2D_<T>)trainDescs);
|
||||||
if (mask.data)
|
if (mask.data)
|
||||||
{
|
{
|
||||||
SingleMask m(mask);
|
SingleMask m(mask);
|
||||||
matchDispatcher<HammingDist>((DevMem2D_<T>)queryDescs, train, m, trainIdx, imgIdx, distance, cc_12);
|
matchDispatcher<HammingDist>((DevMem2D_<T>)queryDescs, train, m, trainIdx, imgIdx, distance, cc_12, stream);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
matchDispatcher<HammingDist>((DevMem2D_<T>)queryDescs, train, WithOutMask(), trainIdx, imgIdx, distance, cc_12);
|
matchDispatcher<HammingDist>((DevMem2D_<T>)queryDescs, train, WithOutMask(), trainIdx, imgIdx, distance, cc_12, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template void matchSingleHamming_gpu<uchar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
|
template void matchSingleHamming_gpu<uchar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12, cudaStream_t stream);
|
||||||
template void matchSingleHamming_gpu<schar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
|
template void matchSingleHamming_gpu<schar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12, cudaStream_t stream);
|
||||||
template void matchSingleHamming_gpu<ushort>(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
|
template void matchSingleHamming_gpu<ushort>(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12, cudaStream_t stream);
|
||||||
template void matchSingleHamming_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
|
template void matchSingleHamming_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12, cudaStream_t stream);
|
||||||
template void matchSingleHamming_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
|
template void matchSingleHamming_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void matchCollectionL1_gpu(const DevMem2D& queryDescs, const DevMem2D& trainCollection,
|
void matchCollectionL1_gpu(const DevMem2D& queryDescs, const DevMem2D& trainCollection,
|
||||||
const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx,
|
const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx,
|
||||||
const DevMem2Df& distance, bool cc_12)
|
const DevMem2Df& distance, bool cc_12, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
TrainCollection<T> train((DevMem2D_<T>*)trainCollection.ptr(), trainCollection.cols, queryDescs.cols);
|
TrainCollection<T> train((DevMem2D_<T>*)trainCollection.ptr(), trainCollection.cols, queryDescs.cols);
|
||||||
if (maskCollection.data)
|
if (maskCollection.data)
|
||||||
{
|
{
|
||||||
MaskCollection mask(maskCollection.data);
|
MaskCollection mask(maskCollection.data);
|
||||||
matchDispatcher< L1Dist<T> >((DevMem2D_<T>)queryDescs, train, mask, trainIdx, imgIdx, distance, cc_12);
|
matchDispatcher< L1Dist<T> >((DevMem2D_<T>)queryDescs, train, mask, trainIdx, imgIdx, distance, cc_12, stream);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
matchDispatcher< L1Dist<T> >((DevMem2D_<T>)queryDescs, train, WithOutMask(), trainIdx, imgIdx, distance, cc_12);
|
matchDispatcher< L1Dist<T> >((DevMem2D_<T>)queryDescs, train, WithOutMask(), trainIdx, imgIdx, distance, cc_12, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template void matchCollectionL1_gpu<uchar >(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
|
template void matchCollectionL1_gpu<uchar >(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12, cudaStream_t stream);
|
||||||
template void matchCollectionL1_gpu<schar >(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
|
template void matchCollectionL1_gpu<schar >(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12, cudaStream_t stream);
|
||||||
template void matchCollectionL1_gpu<ushort>(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
|
template void matchCollectionL1_gpu<ushort>(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12, cudaStream_t stream);
|
||||||
template void matchCollectionL1_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
|
template void matchCollectionL1_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12, cudaStream_t stream);
|
||||||
template void matchCollectionL1_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
|
template void matchCollectionL1_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12, cudaStream_t stream);
|
||||||
template void matchCollectionL1_gpu<float >(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
|
template void matchCollectionL1_gpu<float >(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void matchCollectionL2_gpu(const DevMem2D& queryDescs, const DevMem2D& trainCollection,
|
void matchCollectionL2_gpu(const DevMem2D& queryDescs, const DevMem2D& trainCollection,
|
||||||
const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx,
|
const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx,
|
||||||
const DevMem2Df& distance, bool cc_12)
|
const DevMem2Df& distance, bool cc_12, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
TrainCollection<T> train((DevMem2D_<T>*)trainCollection.ptr(), trainCollection.cols, queryDescs.cols);
|
TrainCollection<T> train((DevMem2D_<T>*)trainCollection.ptr(), trainCollection.cols, queryDescs.cols);
|
||||||
if (maskCollection.data)
|
if (maskCollection.data)
|
||||||
{
|
{
|
||||||
MaskCollection mask(maskCollection.data);
|
MaskCollection mask(maskCollection.data);
|
||||||
matchDispatcher<L2Dist>((DevMem2D_<T>)queryDescs, train, mask, trainIdx, imgIdx, distance, cc_12);
|
matchDispatcher<L2Dist>((DevMem2D_<T>)queryDescs, train, mask, trainIdx, imgIdx, distance, cc_12, stream);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
matchDispatcher<L2Dist>((DevMem2D_<T>)queryDescs, train, WithOutMask(), trainIdx, imgIdx, distance, cc_12);
|
matchDispatcher<L2Dist>((DevMem2D_<T>)queryDescs, train, WithOutMask(), trainIdx, imgIdx, distance, cc_12, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template void matchCollectionL2_gpu<uchar >(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
|
template void matchCollectionL2_gpu<uchar >(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12, cudaStream_t stream);
|
||||||
template void matchCollectionL2_gpu<schar >(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
|
template void matchCollectionL2_gpu<schar >(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12, cudaStream_t stream);
|
||||||
template void matchCollectionL2_gpu<ushort>(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
|
template void matchCollectionL2_gpu<ushort>(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12, cudaStream_t stream);
|
||||||
template void matchCollectionL2_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
|
template void matchCollectionL2_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12, cudaStream_t stream);
|
||||||
template void matchCollectionL2_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
|
template void matchCollectionL2_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12, cudaStream_t stream);
|
||||||
template void matchCollectionL2_gpu<float >(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
|
template void matchCollectionL2_gpu<float >(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void matchCollectionHamming_gpu(const DevMem2D& queryDescs, const DevMem2D& trainCollection,
|
void matchCollectionHamming_gpu(const DevMem2D& queryDescs, const DevMem2D& trainCollection,
|
||||||
const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx,
|
const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx,
|
||||||
const DevMem2Df& distance, bool cc_12)
|
const DevMem2Df& distance, bool cc_12, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
TrainCollection<T> train((DevMem2D_<T>*)trainCollection.ptr(), trainCollection.cols, queryDescs.cols);
|
TrainCollection<T> train((DevMem2D_<T>*)trainCollection.ptr(), trainCollection.cols, queryDescs.cols);
|
||||||
if (maskCollection.data)
|
if (maskCollection.data)
|
||||||
{
|
{
|
||||||
MaskCollection mask(maskCollection.data);
|
MaskCollection mask(maskCollection.data);
|
||||||
matchDispatcher<HammingDist>((DevMem2D_<T>)queryDescs, train, mask, trainIdx, imgIdx, distance, cc_12);
|
matchDispatcher<HammingDist>((DevMem2D_<T>)queryDescs, train, mask, trainIdx, imgIdx, distance, cc_12, stream);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
matchDispatcher<HammingDist>((DevMem2D_<T>)queryDescs, train, WithOutMask(), trainIdx, imgIdx, distance, cc_12);
|
matchDispatcher<HammingDist>((DevMem2D_<T>)queryDescs, train, WithOutMask(), trainIdx, imgIdx, distance, cc_12, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template void matchCollectionHamming_gpu<uchar >(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
|
template void matchCollectionHamming_gpu<uchar >(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12, cudaStream_t stream);
|
||||||
template void matchCollectionHamming_gpu<schar >(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
|
template void matchCollectionHamming_gpu<schar >(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12, cudaStream_t stream);
|
||||||
template void matchCollectionHamming_gpu<ushort>(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
|
template void matchCollectionHamming_gpu<ushort>(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12, cudaStream_t stream);
|
||||||
template void matchCollectionHamming_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
|
template void matchCollectionHamming_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12, cudaStream_t stream);
|
||||||
template void matchCollectionHamming_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
|
template void matchCollectionHamming_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12, cudaStream_t stream);
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////////
|
||||||
//////////////////////////////////// Knn Match ////////////////////////////////////
|
//////////////////////////////////// Knn Match ////////////////////////////////////
|
||||||
@@ -833,16 +833,17 @@ namespace cv { namespace gpu { namespace bfmatcher
|
|||||||
|
|
||||||
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, typename Dist, typename T, typename Mask>
|
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, typename Dist, typename T, typename Mask>
|
||||||
void calcDistance_caller(const DevMem2D_<T>& queryDescs, const DevMem2D_<T>& trainDescs,
|
void calcDistance_caller(const DevMem2D_<T>& queryDescs, const DevMem2D_<T>& trainDescs,
|
||||||
const Mask& mask, const DevMem2Df& distance)
|
const Mask& mask, const DevMem2Df& distance, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y, 1);
|
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y, 1);
|
||||||
dim3 grid(queryDescs.rows, divUp(trainDescs.rows, BLOCK_DIM_Y), 1);
|
dim3 grid(queryDescs.rows, divUp(trainDescs.rows, BLOCK_DIM_Y), 1);
|
||||||
|
|
||||||
calcDistance<BLOCK_DIM_X, BLOCK_DIM_Y, Dist, T><<<grid, threads>>>(
|
calcDistance<BLOCK_DIM_X, BLOCK_DIM_Y, Dist, T><<<grid, threads, 0, stream>>>(
|
||||||
queryDescs, trainDescs, mask, distance);
|
queryDescs, trainDescs, mask, distance);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
if (stream == 0)
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
@@ -1010,105 +1011,106 @@ namespace cv { namespace gpu { namespace bfmatcher
|
|||||||
// find knn match kernel caller
|
// find knn match kernel caller
|
||||||
|
|
||||||
template <int BLOCK_SIZE>
|
template <int BLOCK_SIZE>
|
||||||
void findKnnMatch_caller(int knn, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist)
|
void findKnnMatch_caller(int knn, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(BLOCK_SIZE, 1, 1);
|
dim3 threads(BLOCK_SIZE, 1, 1);
|
||||||
dim3 grid(trainIdx.rows, 1, 1);
|
dim3 grid(trainIdx.rows, 1, 1);
|
||||||
|
|
||||||
for (int i = 0; i < knn; ++i)
|
for (int i = 0; i < knn; ++i)
|
||||||
{
|
{
|
||||||
findBestMatch<BLOCK_SIZE><<<grid, threads>>>(allDist, i, trainIdx, distance);
|
findBestMatch<BLOCK_SIZE><<<grid, threads, 0, stream>>>(allDist, i, trainIdx, distance);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
}
|
}
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
if (stream == 0)
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// knn match caller
|
// knn match caller
|
||||||
|
|
||||||
template <typename Dist, typename T, typename Mask>
|
template <typename Dist, typename T, typename Mask>
|
||||||
void calcDistanceDispatcher(const DevMem2D_<T>& queryDescs, const DevMem2D_<T>& trainDescs, const Mask& mask, const DevMem2Df& allDist)
|
void calcDistanceDispatcher(const DevMem2D_<T>& queryDescs, const DevMem2D_<T>& trainDescs, const Mask& mask, const DevMem2Df& allDist, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
calcDistance_caller<16, 16, Dist>(queryDescs, trainDescs, mask, allDist);
|
calcDistance_caller<16, 16, Dist>(queryDescs, trainDescs, mask, allDist, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
void findKnnMatchDispatcher(int knn, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist)
|
void findKnnMatchDispatcher(int knn, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
findKnnMatch_caller<256>(knn, trainIdx, distance, allDist);
|
findKnnMatch_caller<256>(knn, trainIdx, distance, allDist, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void knnMatchL1_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn,
|
void knnMatchL1_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn,
|
||||||
const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist)
|
const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (mask.data)
|
if (mask.data)
|
||||||
{
|
{
|
||||||
calcDistanceDispatcher< L1Dist<T> >((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs, SingleMask(mask), allDist);
|
calcDistanceDispatcher< L1Dist<T> >((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs, SingleMask(mask), allDist, stream);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
calcDistanceDispatcher< L1Dist<T> >((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs, WithOutMask(), allDist);
|
calcDistanceDispatcher< L1Dist<T> >((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs, WithOutMask(), allDist, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
findKnnMatchDispatcher(knn, trainIdx, distance, allDist);
|
findKnnMatchDispatcher(knn, trainIdx, distance, allDist, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void knnMatchL1_gpu<uchar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist);
|
template void knnMatchL1_gpu<uchar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream);
|
||||||
template void knnMatchL1_gpu<schar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist);
|
template void knnMatchL1_gpu<schar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream);
|
||||||
template void knnMatchL1_gpu<ushort>(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist);
|
template void knnMatchL1_gpu<ushort>(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream);
|
||||||
template void knnMatchL1_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist);
|
template void knnMatchL1_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream);
|
||||||
template void knnMatchL1_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist);
|
template void knnMatchL1_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream);
|
||||||
template void knnMatchL1_gpu<float >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist);
|
template void knnMatchL1_gpu<float >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void knnMatchL2_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn,
|
void knnMatchL2_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn,
|
||||||
const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist)
|
const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (mask.data)
|
if (mask.data)
|
||||||
{
|
{
|
||||||
calcDistanceDispatcher<L2Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
|
calcDistanceDispatcher<L2Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
|
||||||
SingleMask(mask), allDist);
|
SingleMask(mask), allDist, stream);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
calcDistanceDispatcher<L2Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
|
calcDistanceDispatcher<L2Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
|
||||||
WithOutMask(), allDist);
|
WithOutMask(), allDist, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
findKnnMatchDispatcher(knn, trainIdx, distance, allDist);
|
findKnnMatchDispatcher(knn, trainIdx, distance, allDist, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void knnMatchL2_gpu<uchar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist);
|
template void knnMatchL2_gpu<uchar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream);
|
||||||
template void knnMatchL2_gpu<schar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist);
|
template void knnMatchL2_gpu<schar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream);
|
||||||
template void knnMatchL2_gpu<ushort>(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist);
|
template void knnMatchL2_gpu<ushort>(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream);
|
||||||
template void knnMatchL2_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist);
|
template void knnMatchL2_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream);
|
||||||
template void knnMatchL2_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist);
|
template void knnMatchL2_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream);
|
||||||
template void knnMatchL2_gpu<float >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist);
|
template void knnMatchL2_gpu<float >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void knnMatchHamming_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn,
|
void knnMatchHamming_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn,
|
||||||
const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist)
|
const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (mask.data)
|
if (mask.data)
|
||||||
{
|
{
|
||||||
calcDistanceDispatcher<HammingDist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
|
calcDistanceDispatcher<HammingDist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
|
||||||
SingleMask(mask), allDist);
|
SingleMask(mask), allDist, stream);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
calcDistanceDispatcher<HammingDist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
|
calcDistanceDispatcher<HammingDist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
|
||||||
WithOutMask(), allDist);
|
WithOutMask(), allDist, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
findKnnMatchDispatcher(knn, trainIdx, distance, allDist);
|
findKnnMatchDispatcher(knn, trainIdx, distance, allDist, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void knnMatchHamming_gpu<uchar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist);
|
template void knnMatchHamming_gpu<uchar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream);
|
||||||
template void knnMatchHamming_gpu<schar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist);
|
template void knnMatchHamming_gpu<schar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream);
|
||||||
template void knnMatchHamming_gpu<ushort>(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist);
|
template void knnMatchHamming_gpu<ushort>(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream);
|
||||||
template void knnMatchHamming_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist);
|
template void knnMatchHamming_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream);
|
||||||
template void knnMatchHamming_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist);
|
template void knnMatchHamming_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream);
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////////
|
||||||
/////////////////////////////////// Radius Match //////////////////////////////////
|
/////////////////////////////////// Radius Match //////////////////////////////////
|
||||||
@@ -1166,16 +1168,17 @@ namespace cv { namespace gpu { namespace bfmatcher
|
|||||||
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, typename Dist, typename T, typename Mask>
|
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, typename Dist, typename T, typename Mask>
|
||||||
void radiusMatch_caller(const DevMem2D_<T>& queryDescs, const DevMem2D_<T>& trainDescs,
|
void radiusMatch_caller(const DevMem2D_<T>& queryDescs, const DevMem2D_<T>& trainDescs,
|
||||||
float maxDistance, const Mask& mask, const DevMem2Di& trainIdx, unsigned int* nMatches,
|
float maxDistance, const Mask& mask, const DevMem2Di& trainIdx, unsigned int* nMatches,
|
||||||
const DevMem2Df& distance)
|
const DevMem2Df& distance, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y, 1);
|
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y, 1);
|
||||||
dim3 grid(queryDescs.rows, divUp(trainDescs.rows, BLOCK_DIM_Y), 1);
|
dim3 grid(queryDescs.rows, divUp(trainDescs.rows, BLOCK_DIM_Y), 1);
|
||||||
|
|
||||||
radiusMatch<BLOCK_DIM_X, BLOCK_DIM_Y, Dist, T><<<grid, threads>>>(
|
radiusMatch<BLOCK_DIM_X, BLOCK_DIM_Y, Dist, T><<<grid, threads, 0, stream>>>(
|
||||||
queryDescs, trainDescs, maxDistance, mask, trainIdx, nMatches, distance);
|
queryDescs, trainDescs, maxDistance, mask, trainIdx, nMatches, distance);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
if (stream == 0)
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
@@ -1184,77 +1187,77 @@ namespace cv { namespace gpu { namespace bfmatcher
|
|||||||
template <typename Dist, typename T, typename Mask>
|
template <typename Dist, typename T, typename Mask>
|
||||||
void radiusMatchDispatcher(const DevMem2D_<T>& queryDescs, const DevMem2D_<T>& trainDescs,
|
void radiusMatchDispatcher(const DevMem2D_<T>& queryDescs, const DevMem2D_<T>& trainDescs,
|
||||||
float maxDistance, const Mask& mask, const DevMem2Di& trainIdx, unsigned int* nMatches,
|
float maxDistance, const Mask& mask, const DevMem2Di& trainIdx, unsigned int* nMatches,
|
||||||
const DevMem2Df& distance)
|
const DevMem2Df& distance, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
radiusMatch_caller<16, 16, Dist>(queryDescs, trainDescs, maxDistance, mask,
|
radiusMatch_caller<16, 16, Dist>(queryDescs, trainDescs, maxDistance, mask,
|
||||||
trainIdx, nMatches, distance);
|
trainIdx, nMatches, distance, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void radiusMatchL1_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance,
|
void radiusMatchL1_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance,
|
||||||
const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance)
|
const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (mask.data)
|
if (mask.data)
|
||||||
{
|
{
|
||||||
radiusMatchDispatcher< L1Dist<T> >((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
|
radiusMatchDispatcher< L1Dist<T> >((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
|
||||||
maxDistance, SingleMask(mask), trainIdx, nMatches, distance);
|
maxDistance, SingleMask(mask), trainIdx, nMatches, distance, stream);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
radiusMatchDispatcher< L1Dist<T> >((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
|
radiusMatchDispatcher< L1Dist<T> >((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
|
||||||
maxDistance, WithOutMask(), trainIdx, nMatches, distance);
|
maxDistance, WithOutMask(), trainIdx, nMatches, distance, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template void radiusMatchL1_gpu<uchar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance);
|
template void radiusMatchL1_gpu<uchar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance, cudaStream_t stream);
|
||||||
template void radiusMatchL1_gpu<schar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance);
|
template void radiusMatchL1_gpu<schar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance, cudaStream_t stream);
|
||||||
template void radiusMatchL1_gpu<ushort>(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance);
|
template void radiusMatchL1_gpu<ushort>(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance, cudaStream_t stream);
|
||||||
template void radiusMatchL1_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance);
|
template void radiusMatchL1_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance, cudaStream_t stream);
|
||||||
template void radiusMatchL1_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance);
|
template void radiusMatchL1_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance, cudaStream_t stream);
|
||||||
template void radiusMatchL1_gpu<float >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance);
|
template void radiusMatchL1_gpu<float >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void radiusMatchL2_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance,
|
void radiusMatchL2_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance,
|
||||||
const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance)
|
const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (mask.data)
|
if (mask.data)
|
||||||
{
|
{
|
||||||
radiusMatchDispatcher<L2Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
|
radiusMatchDispatcher<L2Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
|
||||||
maxDistance, SingleMask(mask), trainIdx, nMatches, distance);
|
maxDistance, SingleMask(mask), trainIdx, nMatches, distance, stream);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
radiusMatchDispatcher<L2Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
|
radiusMatchDispatcher<L2Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
|
||||||
maxDistance, WithOutMask(), trainIdx, nMatches, distance);
|
maxDistance, WithOutMask(), trainIdx, nMatches, distance, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template void radiusMatchL2_gpu<uchar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance);
|
template void radiusMatchL2_gpu<uchar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance, cudaStream_t stream);
|
||||||
template void radiusMatchL2_gpu<schar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance);
|
template void radiusMatchL2_gpu<schar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance, cudaStream_t stream);
|
||||||
template void radiusMatchL2_gpu<ushort>(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance);
|
template void radiusMatchL2_gpu<ushort>(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance, cudaStream_t stream);
|
||||||
template void radiusMatchL2_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance);
|
template void radiusMatchL2_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance, cudaStream_t stream);
|
||||||
template void radiusMatchL2_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance);
|
template void radiusMatchL2_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance, cudaStream_t stream);
|
||||||
template void radiusMatchL2_gpu<float >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance);
|
template void radiusMatchL2_gpu<float >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void radiusMatchHamming_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance,
|
void radiusMatchHamming_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance,
|
||||||
const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance)
|
const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (mask.data)
|
if (mask.data)
|
||||||
{
|
{
|
||||||
radiusMatchDispatcher<HammingDist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
|
radiusMatchDispatcher<HammingDist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
|
||||||
maxDistance, SingleMask(mask), trainIdx, nMatches, distance);
|
maxDistance, SingleMask(mask), trainIdx, nMatches, distance, stream);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
radiusMatchDispatcher<HammingDist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
|
radiusMatchDispatcher<HammingDist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
|
||||||
maxDistance, WithOutMask(), trainIdx, nMatches, distance);
|
maxDistance, WithOutMask(), trainIdx, nMatches, distance, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template void radiusMatchHamming_gpu<uchar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance);
|
template void radiusMatchHamming_gpu<uchar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance, cudaStream_t stream);
|
||||||
template void radiusMatchHamming_gpu<schar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance);
|
template void radiusMatchHamming_gpu<schar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance, cudaStream_t stream);
|
||||||
template void radiusMatchHamming_gpu<ushort>(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance);
|
template void radiusMatchHamming_gpu<ushort>(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance, cudaStream_t stream);
|
||||||
template void radiusMatchHamming_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance);
|
template void radiusMatchHamming_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance, cudaStream_t stream);
|
||||||
template void radiusMatchHamming_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance);
|
template void radiusMatchHamming_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance, cudaStream_t stream);
|
||||||
}}}
|
}}}
|
||||||
|
|||||||
@@ -184,7 +184,9 @@ namespace cv { namespace gpu
|
|||||||
|
|
||||||
computeHypothesisScoresKernel<<<grid, threads, smem_size>>>(
|
computeHypothesisScoresKernel<<<grid, threads, smem_size>>>(
|
||||||
num_points, object, image, dist_threshold, hypothesis_scores);
|
num_points, object, image, dist_threshold, hypothesis_scores);
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
} // namespace solvepnp_ransac
|
} // namespace solvepnp_ransac
|
||||||
|
|
||||||
|
|||||||
@@ -64,19 +64,19 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
};
|
};
|
||||||
|
|
||||||
template <typename T1, typename T2>
|
template <typename T1, typename T2>
|
||||||
inline void compare_ne(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst)
|
inline void compare_ne(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
NotEqual<T1, T2> op;
|
NotEqual<T1, T2> op;
|
||||||
transform(static_cast< DevMem2D_<T1> >(src1), static_cast< DevMem2D_<T2> >(src2), dst, op, 0);
|
transform(static_cast< DevMem2D_<T1> >(src1), static_cast< DevMem2D_<T2> >(src2), dst, op, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
void compare_ne_8uc4(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst)
|
void compare_ne_8uc4(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
compare_ne<uint, uint>(src1, src2, dst);
|
compare_ne<uint, uint>(src1, src2, dst, stream);
|
||||||
}
|
}
|
||||||
void compare_ne_32f(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst)
|
void compare_ne_32f(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
compare_ne<float, float>(src1, src2, dst);
|
compare_ne<float, float>(src1, src2, dst, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -133,7 +133,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -165,7 +165,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -256,7 +256,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -290,7 +290,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -94,8 +94,8 @@ namespace filter_krnls
|
|||||||
|
|
||||||
__shared__ smem_t smem[BLOCK_DIM_Y * BLOCK_DIM_X * 3];
|
__shared__ smem_t smem[BLOCK_DIM_Y * BLOCK_DIM_X * 3];
|
||||||
|
|
||||||
const int x = BLOCK_DIM_X * blockIdx.x + threadIdx.x;
|
const int x = BLOCK_DIM_X * blockIdx.x + threadIdx.x;
|
||||||
const int y = BLOCK_DIM_Y * blockIdx.y + threadIdx.y;
|
const int y = BLOCK_DIM_Y * blockIdx.y + threadIdx.y;
|
||||||
|
|
||||||
smem_t* sDataRow = smem + threadIdx.y * BLOCK_DIM_X * 3;
|
smem_t* sDataRow = smem + threadIdx.y * BLOCK_DIM_X * 3;
|
||||||
|
|
||||||
@@ -129,7 +129,7 @@ namespace filter_krnls
|
|||||||
namespace cv { namespace gpu { namespace filters
|
namespace cv { namespace gpu { namespace filters
|
||||||
{
|
{
|
||||||
template <int ksize, typename T, typename D, template<typename> class B>
|
template <int ksize, typename T, typename D, template<typename> class B>
|
||||||
void linearRowFilter_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor)
|
void linearRowFilter_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y);
|
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y);
|
||||||
dim3 grid(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y));
|
dim3 grid(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y));
|
||||||
@@ -143,16 +143,17 @@ namespace cv { namespace gpu { namespace filters
|
|||||||
"try bigger image or another border extrapolation mode", __FILE__, __LINE__);
|
"try bigger image or another border extrapolation mode", __FILE__, __LINE__);
|
||||||
}
|
}
|
||||||
|
|
||||||
filter_krnls::linearRowFilter<ksize, T, D><<<grid, threads>>>(src, dst, anchor, b);
|
filter_krnls::linearRowFilter<ksize, T, D><<<grid, threads, 0, stream>>>(src, dst, anchor, b);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
if (stream == 0)
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, typename D>
|
template <typename T, typename D>
|
||||||
void linearRowFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type)
|
void linearRowFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor);
|
typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor, cudaStream_t stream);
|
||||||
static const caller_t callers[3][17] =
|
static const caller_t callers[3][17] =
|
||||||
{
|
{
|
||||||
{
|
{
|
||||||
@@ -216,15 +217,15 @@ namespace cv { namespace gpu { namespace filters
|
|||||||
|
|
||||||
loadLinearKernel(kernel, ksize);
|
loadLinearKernel(kernel, ksize);
|
||||||
|
|
||||||
callers[brd_type][ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor);
|
callers[brd_type][ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void linearRowFilter_gpu<uchar , float >(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type);
|
template void linearRowFilter_gpu<uchar , float >(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
||||||
template void linearRowFilter_gpu<uchar4, float4>(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type);
|
template void linearRowFilter_gpu<uchar4, float4>(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
||||||
template void linearRowFilter_gpu<short , float >(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type);;
|
template void linearRowFilter_gpu<short , float >(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
||||||
template void linearRowFilter_gpu<short2, float2>(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type);
|
template void linearRowFilter_gpu<short2, float2>(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
||||||
template void linearRowFilter_gpu<int , float >(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type);
|
template void linearRowFilter_gpu<int , float >(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
||||||
template void linearRowFilter_gpu<float , float >(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type);
|
template void linearRowFilter_gpu<float , float >(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
namespace filter_krnls
|
namespace filter_krnls
|
||||||
@@ -234,8 +235,8 @@ namespace filter_krnls
|
|||||||
{
|
{
|
||||||
__shared__ T smem[BLOCK_DIM_Y * BLOCK_DIM_X * 3];
|
__shared__ T smem[BLOCK_DIM_Y * BLOCK_DIM_X * 3];
|
||||||
|
|
||||||
const int x = BLOCK_DIM_X * blockIdx.x + threadIdx.x;
|
const int x = BLOCK_DIM_X * blockIdx.x + threadIdx.x;
|
||||||
const int y = BLOCK_DIM_Y * blockIdx.y + threadIdx.y;
|
const int y = BLOCK_DIM_Y * blockIdx.y + threadIdx.y;
|
||||||
|
|
||||||
T* sDataColumn = smem + threadIdx.x;
|
T* sDataColumn = smem + threadIdx.x;
|
||||||
|
|
||||||
@@ -269,7 +270,7 @@ namespace filter_krnls
|
|||||||
namespace cv { namespace gpu { namespace filters
|
namespace cv { namespace gpu { namespace filters
|
||||||
{
|
{
|
||||||
template <int ksize, typename T, typename D, template<typename> class B>
|
template <int ksize, typename T, typename D, template<typename> class B>
|
||||||
void linearColumnFilter_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor)
|
void linearColumnFilter_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y);
|
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y);
|
||||||
dim3 grid(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y));
|
dim3 grid(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y));
|
||||||
@@ -282,16 +283,17 @@ namespace cv { namespace gpu { namespace filters
|
|||||||
"try bigger image or another border extrapolation mode", __FILE__, __LINE__);
|
"try bigger image or another border extrapolation mode", __FILE__, __LINE__);
|
||||||
}
|
}
|
||||||
|
|
||||||
filter_krnls::linearColumnFilter<ksize, T, D><<<grid, threads>>>(src, dst, anchor, b);
|
filter_krnls::linearColumnFilter<ksize, T, D><<<grid, threads, 0, stream>>>(src, dst, anchor, b);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
if (stream == 0)
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, typename D>
|
template <typename T, typename D>
|
||||||
void linearColumnFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type)
|
void linearColumnFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor);
|
typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor, cudaStream_t stream);
|
||||||
static const caller_t callers[3][17] =
|
static const caller_t callers[3][17] =
|
||||||
{
|
{
|
||||||
{
|
{
|
||||||
@@ -355,15 +357,15 @@ namespace cv { namespace gpu { namespace filters
|
|||||||
|
|
||||||
loadLinearKernel(kernel, ksize);
|
loadLinearKernel(kernel, ksize);
|
||||||
|
|
||||||
callers[brd_type][ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor);
|
callers[brd_type][ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void linearColumnFilter_gpu<float , uchar >(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type);
|
template void linearColumnFilter_gpu<float , uchar >(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
||||||
template void linearColumnFilter_gpu<float4, uchar4>(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type);
|
template void linearColumnFilter_gpu<float4, uchar4>(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
||||||
template void linearColumnFilter_gpu<float , short >(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type);
|
template void linearColumnFilter_gpu<float , short >(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
||||||
template void linearColumnFilter_gpu<float2, short2>(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type);
|
template void linearColumnFilter_gpu<float2, short2>(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
||||||
template void linearColumnFilter_gpu<float , int >(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type);
|
template void linearColumnFilter_gpu<float , int >(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
||||||
template void linearColumnFilter_gpu<float , float >(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type);
|
template void linearColumnFilter_gpu<float , float >(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
@@ -541,7 +543,7 @@ namespace cv { namespace gpu { namespace bf
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (stream != 0)
|
if (stream != 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
void bilateral_filter_gpu(const DevMem2D& disp, const DevMem2D& img, int channels, int iters, cudaStream_t stream)
|
void bilateral_filter_gpu(const DevMem2D& disp, const DevMem2D& img, int channels, int iters, cudaStream_t stream)
|
||||||
|
|||||||
@@ -220,7 +220,7 @@ void compute_hists(int nbins, int block_stride_x, int block_stride_y,
|
|||||||
img_block_width, grad, qangle, scale, block_hists);
|
img_block_width, grad, qangle, scale, block_hists);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -324,7 +324,7 @@ void normalize_hists(int nbins, int block_stride_x, int block_stride_y,
|
|||||||
|
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -418,7 +418,7 @@ void classify_hists(int win_height, int win_width, int block_stride_y, int block
|
|||||||
block_hists, coefs, free_coef, threshold, labels);
|
block_hists, coefs, free_coef, threshold, labels);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
//----------------------------------------------------------------------------
|
//----------------------------------------------------------------------------
|
||||||
@@ -463,7 +463,7 @@ void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, i
|
|||||||
img_block_width, win_block_stride_x, win_block_stride_y, block_hists, descriptors);
|
img_block_width, win_block_stride_x, win_block_stride_y, block_hists, descriptors);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -512,7 +512,7 @@ void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, i
|
|||||||
img_block_width, win_block_stride_x, win_block_stride_y, block_hists, descriptors);
|
img_block_width, win_block_stride_x, win_block_stride_y, block_hists, descriptors);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
//----------------------------------------------------------------------------
|
//----------------------------------------------------------------------------
|
||||||
@@ -636,7 +636,8 @@ void compute_gradients_8UC4(int nbins, int height, int width, const DevMem2D& im
|
|||||||
compute_gradients_8UC4_kernel<nthreads, 0><<<gdim, bdim>>>(height, width, img, angle_scale, grad, qangle);
|
compute_gradients_8UC4_kernel<nthreads, 0><<<gdim, bdim>>>(height, width, img, angle_scale, grad, qangle);
|
||||||
|
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int nthreads, int correct_gamma>
|
template <int nthreads, int correct_gamma>
|
||||||
@@ -707,7 +708,8 @@ void compute_gradients_8UC1(int nbins, int height, int width, const DevMem2D& im
|
|||||||
compute_gradients_8UC1_kernel<nthreads, 0><<<gdim, bdim>>>(height, width, img, angle_scale, grad, qangle);
|
compute_gradients_8UC1_kernel<nthreads, 0><<<gdim, bdim>>>(height, width, img, angle_scale, grad, qangle);
|
||||||
|
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -765,7 +767,9 @@ static void resize_for_hog(const DevMem2D& src, DevMem2D dst, TEX& tex)
|
|||||||
|
|
||||||
resize_for_hog_kernel<<<grid, threads>>>(sx, sy, (DevMem2D_<T>)dst, colOfs);
|
resize_for_hog_kernel<<<grid, threads>>>(sx, sy, (DevMem2D_<T>)dst, colOfs);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
|
|
||||||
cudaSafeCall( cudaUnbindTexture(tex) );
|
cudaSafeCall( cudaUnbindTexture(tex) );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -139,7 +139,7 @@ namespace cv { namespace gpu { namespace imgproc
|
|||||||
remap_1c<<<grid, threads>>>(xmap.data, ymap.data, xmap.step, dst.data, dst.step, dst.cols, dst.rows);
|
remap_1c<<<grid, threads>>>(xmap.data, ymap.data, xmap.step, dst.data, dst.step, dst.cols, dst.rows);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
cudaSafeCall( cudaUnbindTexture(tex_remap) );
|
cudaSafeCall( cudaUnbindTexture(tex_remap) );
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -153,7 +153,7 @@ namespace cv { namespace gpu { namespace imgproc
|
|||||||
remap_3c<<<grid, threads>>>(src.data, src.step, xmap.data, ymap.data, xmap.step, dst.data, dst.step, dst.cols, dst.rows);
|
remap_3c<<<grid, threads>>>(src.data, src.step, xmap.data, ymap.data, xmap.step, dst.data, dst.step, dst.cols, dst.rows);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
/////////////////////////////////// MeanShiftfiltering ///////////////////////////////////////////////
|
/////////////////////////////////// MeanShiftfiltering ///////////////////////////////////////////////
|
||||||
@@ -263,7 +263,7 @@ namespace cv { namespace gpu { namespace imgproc
|
|||||||
meanshift_kernel<<< grid, threads >>>( dst.data, dst.step, dst.cols, dst.rows, sp, sr, maxIter, eps );
|
meanshift_kernel<<< grid, threads >>>( dst.data, dst.step, dst.cols, dst.rows, sp, sr, maxIter, eps );
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
cudaSafeCall( cudaUnbindTexture( tex_meanshift ) );
|
cudaSafeCall( cudaUnbindTexture( tex_meanshift ) );
|
||||||
}
|
}
|
||||||
extern "C" void meanShiftProc_gpu(const DevMem2D& src, DevMem2D dstr, DevMem2D dstsp, int sp, int sr, int maxIter, float eps)
|
extern "C" void meanShiftProc_gpu(const DevMem2D& src, DevMem2D dstr, DevMem2D dstsp, int sp, int sr, int maxIter, float eps)
|
||||||
@@ -279,7 +279,7 @@ namespace cv { namespace gpu { namespace imgproc
|
|||||||
meanshiftproc_kernel<<< grid, threads >>>( dstr.data, dstr.step, dstsp.data, dstsp.step, dstr.cols, dstr.rows, sp, sr, maxIter, eps );
|
meanshiftproc_kernel<<< grid, threads >>>( dstr.data, dstr.step, dstsp.data, dstsp.step, dstr.cols, dstr.rows, sp, sr, maxIter, eps );
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
cudaSafeCall( cudaUnbindTexture( tex_meanshift ) );
|
cudaSafeCall( cudaUnbindTexture( tex_meanshift ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -397,7 +397,7 @@ namespace cv { namespace gpu { namespace imgproc
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
void drawColorDisp_gpu(const DevMem2D_<short>& src, const DevMem2D& dst, int ndisp, const cudaStream_t& stream)
|
void drawColorDisp_gpu(const DevMem2D_<short>& src, const DevMem2D& dst, int ndisp, const cudaStream_t& stream)
|
||||||
@@ -411,7 +411,7 @@ namespace cv { namespace gpu { namespace imgproc
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
/////////////////////////////////// reprojectImageTo3D ///////////////////////////////////////////////
|
/////////////////////////////////// reprojectImageTo3D ///////////////////////////////////////////////
|
||||||
@@ -462,7 +462,7 @@ namespace cv { namespace gpu { namespace imgproc
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
void reprojectImageTo3D_gpu(const DevMem2D& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream)
|
void reprojectImageTo3D_gpu(const DevMem2D& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream)
|
||||||
@@ -502,7 +502,7 @@ namespace cv { namespace gpu { namespace imgproc
|
|||||||
extractCovData_kernel<<<grid, threads>>>(Dx.cols, Dx.rows, Dx, Dy, dst);
|
extractCovData_kernel<<<grid, threads>>>(Dx.cols, Dx.rows, Dx, Dy, dst);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
/////////////////////////////////////////// Corner Harris /////////////////////////////////////////////////
|
/////////////////////////////////////////// Corner Harris /////////////////////////////////////////////////
|
||||||
@@ -611,7 +611,8 @@ namespace cv { namespace gpu { namespace imgproc
|
|||||||
|
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
|
|
||||||
cudaSafeCall(cudaUnbindTexture(harrisDxTex));
|
cudaSafeCall(cudaUnbindTexture(harrisDxTex));
|
||||||
cudaSafeCall(cudaUnbindTexture(harrisDyTex));
|
cudaSafeCall(cudaUnbindTexture(harrisDyTex));
|
||||||
}
|
}
|
||||||
@@ -727,7 +728,8 @@ namespace cv { namespace gpu { namespace imgproc
|
|||||||
|
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall(cudaDeviceSynchronize());
|
||||||
|
|
||||||
cudaSafeCall(cudaUnbindTexture(minEigenValDxTex));
|
cudaSafeCall(cudaUnbindTexture(minEigenValDxTex));
|
||||||
cudaSafeCall(cudaUnbindTexture(minEigenValDyTex));
|
cudaSafeCall(cudaUnbindTexture(minEigenValDyTex));
|
||||||
}
|
}
|
||||||
@@ -763,7 +765,7 @@ namespace cv { namespace gpu { namespace imgproc
|
|||||||
column_sumKernel_32F<<<grid, threads>>>(src.cols, src.rows, src, dst);
|
column_sumKernel_32F<<<grid, threads>>>(src.cols, src.rows, src, dst);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
@@ -791,7 +793,7 @@ namespace cv { namespace gpu { namespace imgproc
|
|||||||
mulSpectrumsKernel<<<grid, threads>>>(a, b, c);
|
mulSpectrumsKernel<<<grid, threads>>>(a, b, c);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
@@ -820,7 +822,7 @@ namespace cv { namespace gpu { namespace imgproc
|
|||||||
mulSpectrumsKernel_CONJ<<<grid, threads>>>(a, b, c);
|
mulSpectrumsKernel_CONJ<<<grid, threads>>>(a, b, c);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
@@ -850,7 +852,7 @@ namespace cv { namespace gpu { namespace imgproc
|
|||||||
mulAndScaleSpectrumsKernel<<<grid, threads>>>(a, b, scale, c);
|
mulAndScaleSpectrumsKernel<<<grid, threads>>>(a, b, scale, c);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
@@ -880,7 +882,7 @@ namespace cv { namespace gpu { namespace imgproc
|
|||||||
mulAndScaleSpectrumsKernel_CONJ<<<grid, threads>>>(a, b, scale, c);
|
mulAndScaleSpectrumsKernel_CONJ<<<grid, threads>>>(a, b, scale, c);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////
|
||||||
@@ -904,7 +906,9 @@ namespace cv { namespace gpu { namespace imgproc
|
|||||||
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
|
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
|
||||||
|
|
||||||
downsampleKernel<<<grid, threads>>>(src, rows, cols, k, dst);
|
downsampleKernel<<<grid, threads>>>(src, rows, cols, k, dst);
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template void downsampleCaller(const PtrStep src, int rows, int cols, int k, PtrStep dst);
|
template void downsampleCaller(const PtrStep src, int rows, int cols, int k, PtrStep dst);
|
||||||
|
|||||||
@@ -46,6 +46,8 @@
|
|||||||
#include "opencv2/gpu/devmem2d.hpp"
|
#include "opencv2/gpu/devmem2d.hpp"
|
||||||
#include "safe_call.hpp"
|
#include "safe_call.hpp"
|
||||||
#include "cuda_runtime.h"
|
#include "cuda_runtime.h"
|
||||||
|
#include "npp.h"
|
||||||
|
#include "NPP_staging.hpp"
|
||||||
|
|
||||||
namespace cv
|
namespace cv
|
||||||
{
|
{
|
||||||
@@ -106,6 +108,41 @@ namespace cv
|
|||||||
cudaSafeCall( cudaGetTextureReference(&tex, name) );
|
cudaSafeCall( cudaGetTextureReference(&tex, name) );
|
||||||
cudaSafeCall( cudaUnbindTexture(tex) );
|
cudaSafeCall( cudaUnbindTexture(tex) );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class NppStreamHandler
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
inline explicit NppStreamHandler(cudaStream_t newStream = 0)
|
||||||
|
{
|
||||||
|
oldStream = nppGetStream();
|
||||||
|
nppSetStream(newStream);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline ~NppStreamHandler()
|
||||||
|
{
|
||||||
|
nppSetStream(oldStream);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
cudaStream_t oldStream;
|
||||||
|
};
|
||||||
|
|
||||||
|
class NppStStreamHandler
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
inline explicit NppStStreamHandler(cudaStream_t newStream = 0)
|
||||||
|
{
|
||||||
|
oldStream = nppStSetActiveCUDAstream(newStream);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline ~NppStStreamHandler()
|
||||||
|
{
|
||||||
|
nppStSetActiveCUDAstream(oldStream);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
cudaStream_t oldStream;
|
||||||
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -134,7 +134,7 @@ void matchTemplateNaive_CCORR_32F(const DevMem2D image, const DevMem2D templ,
|
|||||||
}
|
}
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -165,7 +165,7 @@ void matchTemplateNaive_CCORR_8U(const DevMem2D image, const DevMem2D templ,
|
|||||||
}
|
}
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -228,7 +228,7 @@ void matchTemplateNaive_SQDIFF_32F(const DevMem2D image, const DevMem2D templ,
|
|||||||
}
|
}
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -259,7 +259,7 @@ void matchTemplateNaive_SQDIFF_8U(const DevMem2D image, const DevMem2D templ,
|
|||||||
}
|
}
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -309,7 +309,7 @@ void matchTemplatePrepared_SQDIFF_8U(
|
|||||||
}
|
}
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -360,7 +360,7 @@ void matchTemplatePrepared_SQDIFF_NORMED_8U(
|
|||||||
}
|
}
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -392,7 +392,7 @@ void matchTemplatePrepared_CCOFF_8U(
|
|||||||
w, h, (float)templ_sum / (w * h), image_sum, result);
|
w, h, (float)templ_sum / (w * h), image_sum, result);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -434,7 +434,7 @@ void matchTemplatePrepared_CCOFF_8UC2(
|
|||||||
image_sum_r, image_sum_g, result);
|
image_sum_r, image_sum_g, result);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -490,7 +490,7 @@ void matchTemplatePrepared_CCOFF_8UC3(
|
|||||||
image_sum_r, image_sum_g, image_sum_b, result);
|
image_sum_r, image_sum_g, image_sum_b, result);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -556,7 +556,7 @@ void matchTemplatePrepared_CCOFF_8UC4(
|
|||||||
result);
|
result);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -602,7 +602,7 @@ void matchTemplatePrepared_CCOFF_NORMED_8U(
|
|||||||
image_sum, image_sqsum, result);
|
image_sum, image_sqsum, result);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -665,7 +665,7 @@ void matchTemplatePrepared_CCOFF_NORMED_8UC2(
|
|||||||
result);
|
result);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -742,7 +742,7 @@ void matchTemplatePrepared_CCOFF_NORMED_8UC3(
|
|||||||
result);
|
result);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -833,7 +833,7 @@ void matchTemplatePrepared_CCOFF_NORMED_8UC4(
|
|||||||
result);
|
result);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -877,7 +877,7 @@ void normalize_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum,
|
|||||||
}
|
}
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -919,7 +919,7 @@ void extractFirstChannel_32F(const DevMem2D image, DevMem2Df result, int cn)
|
|||||||
}
|
}
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -153,7 +153,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
void cartToPolar_gpu(const DevMem2Df& x, const DevMem2Df& y, const DevMem2Df& mag, bool magSqr, const DevMem2Df& angle, bool angleInDegrees, cudaStream_t stream)
|
void cartToPolar_gpu(const DevMem2Df& x, const DevMem2Df& y, const DevMem2Df& mag, bool magSqr, const DevMem2Df& angle, bool angleInDegrees, cudaStream_t stream)
|
||||||
@@ -202,7 +202,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
void polarToCart_gpu(const DevMem2Df& mag, const DevMem2Df& angle, const DevMem2Df& x, const DevMem2Df& y, bool angleInDegrees, cudaStream_t stream)
|
void polarToCart_gpu(const DevMem2Df& mag, const DevMem2Df& angle, const DevMem2Df& x, const DevMem2Df& y, bool angleInDegrees, cudaStream_t stream)
|
||||||
|
|||||||
@@ -87,7 +87,7 @@ namespace cv { namespace gpu { namespace matrix_operations {
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall ( cudaThreadSynchronize() );
|
cudaSafeCall ( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
void copy_to_with_mask(const DevMem2D& mat_src, DevMem2D mat_dst, int depth, const DevMem2D& mask, int channels, const cudaStream_t & stream)
|
void copy_to_with_mask(const DevMem2D& mat_src, DevMem2D mat_dst, int depth, const DevMem2D& mask, int channels, const cudaStream_t & stream)
|
||||||
@@ -199,7 +199,7 @@ namespace cv { namespace gpu { namespace matrix_operations {
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall ( cudaThreadSynchronize() );
|
cudaSafeCall ( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template void set_to_gpu<uchar >(const DevMem2D& mat, const uchar* scalar, const DevMem2D& mask, int channels, cudaStream_t stream);
|
template void set_to_gpu<uchar >(const DevMem2D& mat, const uchar* scalar, const DevMem2D& mask, int channels, cudaStream_t stream);
|
||||||
@@ -222,7 +222,7 @@ namespace cv { namespace gpu { namespace matrix_operations {
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall ( cudaThreadSynchronize() );
|
cudaSafeCall ( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template void set_to_gpu<uchar >(const DevMem2D& mat, const uchar* scalar, int channels, cudaStream_t stream);
|
template void set_to_gpu<uchar >(const DevMem2D& mat, const uchar* scalar, int channels, cudaStream_t stream);
|
||||||
|
|||||||
@@ -275,11 +275,11 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
minMaxKernel<256, T, Mask8U><<<grid, threads>>>(src, Mask8U(mask), minval_buf, maxval_buf);
|
minMaxKernel<256, T, Mask8U><<<grid, threads>>>(src, Mask8U(mask), minval_buf, maxval_buf);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
|
|
||||||
T minval_, maxval_;
|
T minval_, maxval_;
|
||||||
cudaSafeCall(cudaMemcpy(&minval_, minval_buf, sizeof(T), cudaMemcpyDeviceToHost));
|
cudaSafeCall( cudaMemcpy(&minval_, minval_buf, sizeof(T), cudaMemcpyDeviceToHost) );
|
||||||
cudaSafeCall(cudaMemcpy(&maxval_, maxval_buf, sizeof(T), cudaMemcpyDeviceToHost));
|
cudaSafeCall( cudaMemcpy(&maxval_, maxval_buf, sizeof(T), cudaMemcpyDeviceToHost) );
|
||||||
*minval = minval_;
|
*minval = minval_;
|
||||||
*maxval = maxval_;
|
*maxval = maxval_;
|
||||||
}
|
}
|
||||||
@@ -306,11 +306,11 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
minMaxKernel<256, T, MaskTrue><<<grid, threads>>>(src, MaskTrue(), minval_buf, maxval_buf);
|
minMaxKernel<256, T, MaskTrue><<<grid, threads>>>(src, MaskTrue(), minval_buf, maxval_buf);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
|
|
||||||
T minval_, maxval_;
|
T minval_, maxval_;
|
||||||
cudaSafeCall(cudaMemcpy(&minval_, minval_buf, sizeof(T), cudaMemcpyDeviceToHost));
|
cudaSafeCall( cudaMemcpy(&minval_, minval_buf, sizeof(T), cudaMemcpyDeviceToHost) );
|
||||||
cudaSafeCall(cudaMemcpy(&maxval_, maxval_buf, sizeof(T), cudaMemcpyDeviceToHost));
|
cudaSafeCall( cudaMemcpy(&maxval_, maxval_buf, sizeof(T), cudaMemcpyDeviceToHost) );
|
||||||
*minval = minval_;
|
*minval = minval_;
|
||||||
*maxval = maxval_;
|
*maxval = maxval_;
|
||||||
}
|
}
|
||||||
@@ -363,11 +363,11 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
minMaxPass2Kernel<256, T><<<1, 256>>>(minval_buf, maxval_buf, grid.x * grid.y);
|
minMaxPass2Kernel<256, T><<<1, 256>>>(minval_buf, maxval_buf, grid.x * grid.y);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall(cudaDeviceSynchronize());
|
||||||
|
|
||||||
T minval_, maxval_;
|
T minval_, maxval_;
|
||||||
cudaSafeCall(cudaMemcpy(&minval_, minval_buf, sizeof(T), cudaMemcpyDeviceToHost));
|
cudaSafeCall( cudaMemcpy(&minval_, minval_buf, sizeof(T), cudaMemcpyDeviceToHost) );
|
||||||
cudaSafeCall(cudaMemcpy(&maxval_, maxval_buf, sizeof(T), cudaMemcpyDeviceToHost));
|
cudaSafeCall( cudaMemcpy(&maxval_, maxval_buf, sizeof(T), cudaMemcpyDeviceToHost) );
|
||||||
*minval = minval_;
|
*minval = minval_;
|
||||||
*maxval = maxval_;
|
*maxval = maxval_;
|
||||||
}
|
}
|
||||||
@@ -395,11 +395,11 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
minMaxPass2Kernel<256, T><<<1, 256>>>(minval_buf, maxval_buf, grid.x * grid.y);
|
minMaxPass2Kernel<256, T><<<1, 256>>>(minval_buf, maxval_buf, grid.x * grid.y);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
|
|
||||||
T minval_, maxval_;
|
T minval_, maxval_;
|
||||||
cudaSafeCall(cudaMemcpy(&minval_, minval_buf, sizeof(T), cudaMemcpyDeviceToHost));
|
cudaSafeCall( cudaMemcpy(&minval_, minval_buf, sizeof(T), cudaMemcpyDeviceToHost) );
|
||||||
cudaSafeCall(cudaMemcpy(&maxval_, maxval_buf, sizeof(T), cudaMemcpyDeviceToHost));
|
cudaSafeCall( cudaMemcpy(&maxval_, maxval_buf, sizeof(T), cudaMemcpyDeviceToHost) );
|
||||||
*minval = minval_;
|
*minval = minval_;
|
||||||
*maxval = maxval_;
|
*maxval = maxval_;
|
||||||
}
|
}
|
||||||
@@ -609,17 +609,17 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
minloc_buf, maxloc_buf);
|
minloc_buf, maxloc_buf);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
|
|
||||||
T minval_, maxval_;
|
T minval_, maxval_;
|
||||||
cudaSafeCall(cudaMemcpy(&minval_, minval_buf, sizeof(T), cudaMemcpyDeviceToHost));
|
cudaSafeCall( cudaMemcpy(&minval_, minval_buf, sizeof(T), cudaMemcpyDeviceToHost) );
|
||||||
cudaSafeCall(cudaMemcpy(&maxval_, maxval_buf, sizeof(T), cudaMemcpyDeviceToHost));
|
cudaSafeCall( cudaMemcpy(&maxval_, maxval_buf, sizeof(T), cudaMemcpyDeviceToHost) );
|
||||||
*minval = minval_;
|
*minval = minval_;
|
||||||
*maxval = maxval_;
|
*maxval = maxval_;
|
||||||
|
|
||||||
uint minloc_, maxloc_;
|
uint minloc_, maxloc_;
|
||||||
cudaSafeCall(cudaMemcpy(&minloc_, minloc_buf, sizeof(int), cudaMemcpyDeviceToHost));
|
cudaSafeCall( cudaMemcpy(&minloc_, minloc_buf, sizeof(int), cudaMemcpyDeviceToHost) );
|
||||||
cudaSafeCall(cudaMemcpy(&maxloc_, maxloc_buf, sizeof(int), cudaMemcpyDeviceToHost));
|
cudaSafeCall( cudaMemcpy(&maxloc_, maxloc_buf, sizeof(int), cudaMemcpyDeviceToHost) );
|
||||||
minloc[1] = minloc_ / src.cols; minloc[0] = minloc_ - minloc[1] * src.cols;
|
minloc[1] = minloc_ / src.cols; minloc[0] = minloc_ - minloc[1] * src.cols;
|
||||||
maxloc[1] = maxloc_ / src.cols; maxloc[0] = maxloc_ - maxloc[1] * src.cols;
|
maxloc[1] = maxloc_ / src.cols; maxloc[0] = maxloc_ - maxloc[1] * src.cols;
|
||||||
}
|
}
|
||||||
@@ -650,7 +650,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
minloc_buf, maxloc_buf);
|
minloc_buf, maxloc_buf);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
|
|
||||||
T minval_, maxval_;
|
T minval_, maxval_;
|
||||||
cudaSafeCall(cudaMemcpy(&minval_, minval_buf, sizeof(T), cudaMemcpyDeviceToHost));
|
cudaSafeCall(cudaMemcpy(&minval_, minval_buf, sizeof(T), cudaMemcpyDeviceToHost));
|
||||||
@@ -724,7 +724,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
minMaxLocPass2Kernel<256, T><<<1, 256>>>(minval_buf, maxval_buf, minloc_buf, maxloc_buf, grid.x * grid.y);
|
minMaxLocPass2Kernel<256, T><<<1, 256>>>(minval_buf, maxval_buf, minloc_buf, maxloc_buf, grid.x * grid.y);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
|
|
||||||
T minval_, maxval_;
|
T minval_, maxval_;
|
||||||
cudaSafeCall(cudaMemcpy(&minval_, minval_buf, sizeof(T), cudaMemcpyDeviceToHost));
|
cudaSafeCall(cudaMemcpy(&minval_, minval_buf, sizeof(T), cudaMemcpyDeviceToHost));
|
||||||
@@ -766,7 +766,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
minMaxLocPass2Kernel<256, T><<<1, 256>>>(minval_buf, maxval_buf, minloc_buf, maxloc_buf, grid.x * grid.y);
|
minMaxLocPass2Kernel<256, T><<<1, 256>>>(minval_buf, maxval_buf, minloc_buf, maxloc_buf, grid.x * grid.y);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
|
|
||||||
T minval_, maxval_;
|
T minval_, maxval_;
|
||||||
cudaSafeCall(cudaMemcpy(&minval_, minval_buf, sizeof(T), cudaMemcpyDeviceToHost));
|
cudaSafeCall(cudaMemcpy(&minval_, minval_buf, sizeof(T), cudaMemcpyDeviceToHost));
|
||||||
@@ -895,7 +895,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
countNonZeroKernel<256, T><<<grid, threads>>>(src, count_buf);
|
countNonZeroKernel<256, T><<<grid, threads>>>(src, count_buf);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
|
|
||||||
uint count;
|
uint count;
|
||||||
cudaSafeCall(cudaMemcpy(&count, count_buf, sizeof(int), cudaMemcpyDeviceToHost));
|
cudaSafeCall(cudaMemcpy(&count, count_buf, sizeof(int), cudaMemcpyDeviceToHost));
|
||||||
@@ -942,7 +942,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
countNonZeroPass2Kernel<256, T><<<1, 256>>>(count_buf, grid.x * grid.y);
|
countNonZeroPass2Kernel<256, T><<<1, 256>>>(count_buf, grid.x * grid.y);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
|
|
||||||
uint count;
|
uint count;
|
||||||
cudaSafeCall(cudaMemcpy(&count, count_buf, sizeof(int), cudaMemcpyDeviceToHost));
|
cudaSafeCall(cudaMemcpy(&count, count_buf, sizeof(int), cudaMemcpyDeviceToHost));
|
||||||
@@ -1493,7 +1493,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
|
|
||||||
R result[4] = {0, 0, 0, 0};
|
R result[4] = {0, 0, 0, 0};
|
||||||
cudaSafeCall(cudaMemcpy(&result, buf.ptr(0), sizeof(R) * cn, cudaMemcpyDeviceToHost));
|
cudaSafeCall(cudaMemcpy(&result, buf.ptr(0), sizeof(R) * cn, cudaMemcpyDeviceToHost));
|
||||||
@@ -1543,7 +1543,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
}
|
}
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
|
|
||||||
R result[4] = {0, 0, 0, 0};
|
R result[4] = {0, 0, 0, 0};
|
||||||
cudaSafeCall(cudaMemcpy(&result, buf.ptr(0), sizeof(R) * cn, cudaMemcpyDeviceToHost));
|
cudaSafeCall(cudaMemcpy(&result, buf.ptr(0), sizeof(R) * cn, cudaMemcpyDeviceToHost));
|
||||||
@@ -1615,7 +1615,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
|
|
||||||
R result[4] = {0, 0, 0, 0};
|
R result[4] = {0, 0, 0, 0};
|
||||||
cudaSafeCall(cudaMemcpy(result, buf.ptr(0), sizeof(R) * cn, cudaMemcpyDeviceToHost));
|
cudaSafeCall(cudaMemcpy(result, buf.ptr(0), sizeof(R) * cn, cudaMemcpyDeviceToHost));
|
||||||
@@ -1665,7 +1665,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
}
|
}
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
|
|
||||||
R result[4] = {0, 0, 0, 0};
|
R result[4] = {0, 0, 0, 0};
|
||||||
cudaSafeCall(cudaMemcpy(result, buf.ptr(0), sizeof(R) * cn, cudaMemcpyDeviceToHost));
|
cudaSafeCall(cudaMemcpy(result, buf.ptr(0), sizeof(R) * cn, cudaMemcpyDeviceToHost));
|
||||||
@@ -1737,7 +1737,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
|
|
||||||
R result[4] = {0, 0, 0, 0};
|
R result[4] = {0, 0, 0, 0};
|
||||||
cudaSafeCall(cudaMemcpy(result, buf.ptr(0), sizeof(R) * cn, cudaMemcpyDeviceToHost));
|
cudaSafeCall(cudaMemcpy(result, buf.ptr(0), sizeof(R) * cn, cudaMemcpyDeviceToHost));
|
||||||
@@ -1787,7 +1787,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
}
|
}
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
|
|
||||||
R result[4] = {0, 0, 0, 0};
|
R result[4] = {0, 0, 0, 0};
|
||||||
cudaSafeCall(cudaMemcpy(result, buf.ptr(0), sizeof(R) * cn, cudaMemcpyDeviceToHost));
|
cudaSafeCall(cudaMemcpy(result, buf.ptr(0), sizeof(R) * cn, cudaMemcpyDeviceToHost));
|
||||||
|
|||||||
@@ -236,7 +236,7 @@ namespace cv { namespace gpu { namespace split_merge {
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall(cudaDeviceSynchronize());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -253,7 +253,7 @@ namespace cv { namespace gpu { namespace split_merge {
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall(cudaDeviceSynchronize());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -271,7 +271,7 @@ namespace cv { namespace gpu { namespace split_merge {
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall(cudaDeviceSynchronize());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -445,7 +445,7 @@ namespace cv { namespace gpu { namespace split_merge {
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall(cudaDeviceSynchronize());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -462,7 +462,7 @@ namespace cv { namespace gpu { namespace split_merge {
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall(cudaDeviceSynchronize());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -480,7 +480,7 @@ namespace cv { namespace gpu { namespace split_merge {
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall(cudaDeviceSynchronize());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -102,19 +102,19 @@ __device__ uint2 MinSSD(volatile unsigned int *col_ssd_cache, volatile unsigned
|
|||||||
|
|
||||||
//See above: #define COL_SSD_SIZE (BLOCK_W + 2 * RADIUS)
|
//See above: #define COL_SSD_SIZE (BLOCK_W + 2 * RADIUS)
|
||||||
ssd[0] = CalcSSD<RADIUS>(col_ssd_cache, col_ssd + 0 * (BLOCK_W + 2 * RADIUS));
|
ssd[0] = CalcSSD<RADIUS>(col_ssd_cache, col_ssd + 0 * (BLOCK_W + 2 * RADIUS));
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
ssd[1] = CalcSSD<RADIUS>(col_ssd_cache, col_ssd + 1 * (BLOCK_W + 2 * RADIUS));
|
ssd[1] = CalcSSD<RADIUS>(col_ssd_cache, col_ssd + 1 * (BLOCK_W + 2 * RADIUS));
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
ssd[2] = CalcSSD<RADIUS>(col_ssd_cache, col_ssd + 2 * (BLOCK_W + 2 * RADIUS));
|
ssd[2] = CalcSSD<RADIUS>(col_ssd_cache, col_ssd + 2 * (BLOCK_W + 2 * RADIUS));
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
ssd[3] = CalcSSD<RADIUS>(col_ssd_cache, col_ssd + 3 * (BLOCK_W + 2 * RADIUS));
|
ssd[3] = CalcSSD<RADIUS>(col_ssd_cache, col_ssd + 3 * (BLOCK_W + 2 * RADIUS));
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
ssd[4] = CalcSSD<RADIUS>(col_ssd_cache, col_ssd + 4 * (BLOCK_W + 2 * RADIUS));
|
ssd[4] = CalcSSD<RADIUS>(col_ssd_cache, col_ssd + 4 * (BLOCK_W + 2 * RADIUS));
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
ssd[5] = CalcSSD<RADIUS>(col_ssd_cache, col_ssd + 5 * (BLOCK_W + 2 * RADIUS));
|
ssd[5] = CalcSSD<RADIUS>(col_ssd_cache, col_ssd + 5 * (BLOCK_W + 2 * RADIUS));
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
ssd[6] = CalcSSD<RADIUS>(col_ssd_cache, col_ssd + 6 * (BLOCK_W + 2 * RADIUS));
|
ssd[6] = CalcSSD<RADIUS>(col_ssd_cache, col_ssd + 6 * (BLOCK_W + 2 * RADIUS));
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
ssd[7] = CalcSSD<RADIUS>(col_ssd_cache, col_ssd + 7 * (BLOCK_W + 2 * RADIUS));
|
ssd[7] = CalcSSD<RADIUS>(col_ssd_cache, col_ssd + 7 * (BLOCK_W + 2 * RADIUS));
|
||||||
|
|
||||||
int mssd = min(min(min(ssd[0], ssd[1]), min(ssd[4], ssd[5])), min(min(ssd[2], ssd[3]), min(ssd[6], ssd[7])));
|
int mssd = min(min(min(ssd[0], ssd[1]), min(ssd[4], ssd[5])), min(min(ssd[2], ssd[3]), min(ssd[6], ssd[7])));
|
||||||
@@ -328,7 +328,7 @@ template<int RADIUS> void kernel_caller(const DevMem2D& left, const DevMem2D& ri
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef void (*kernel_caller_t)(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int maxdisp, cudaStream_t & stream);
|
typedef void (*kernel_caller_t)(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int maxdisp, cudaStream_t & stream);
|
||||||
@@ -407,7 +407,7 @@ extern "C" void prefilter_xsobel(const DevMem2D& input, const DevMem2D& output,
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
|
|
||||||
cudaSafeCall( cudaUnbindTexture (texForSobel ) );
|
cudaSafeCall( cudaUnbindTexture (texForSobel ) );
|
||||||
}
|
}
|
||||||
@@ -531,10 +531,10 @@ extern "C" void postfilter_textureness(const DevMem2D& input, int winsz, float a
|
|||||||
textureness_kernel<<<grid, threads, smem_size, stream>>>(disp, winsz, avgTexturenessThreshold);
|
textureness_kernel<<<grid, threads, smem_size, stream>>>(disp, winsz, avgTexturenessThreshold);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
cudaSafeCall( cudaUnbindTexture (texForTF) );
|
|
||||||
|
|
||||||
|
cudaSafeCall( cudaUnbindTexture (texForTF) );
|
||||||
}
|
}
|
||||||
|
|
||||||
}}}
|
}}}
|
||||||
|
|||||||
@@ -175,7 +175,7 @@ namespace cv { namespace gpu { namespace bp
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
template <> void comp_data_gpu<uchar, float>(const DevMem2D& left, const DevMem2D& right, const DevMem2D& data, cudaStream_t stream)
|
template <> void comp_data_gpu<uchar, float>(const DevMem2D& left, const DevMem2D& right, const DevMem2D& data, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
@@ -189,7 +189,7 @@ namespace cv { namespace gpu { namespace bp
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template <> void comp_data_gpu<uchar3, short>(const DevMem2D& left, const DevMem2D& right, const DevMem2D& data, cudaStream_t stream)
|
template <> void comp_data_gpu<uchar3, short>(const DevMem2D& left, const DevMem2D& right, const DevMem2D& data, cudaStream_t stream)
|
||||||
@@ -204,7 +204,7 @@ namespace cv { namespace gpu { namespace bp
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
template <> void comp_data_gpu<uchar3, float>(const DevMem2D& left, const DevMem2D& right, const DevMem2D& data, cudaStream_t stream)
|
template <> void comp_data_gpu<uchar3, float>(const DevMem2D& left, const DevMem2D& right, const DevMem2D& data, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
@@ -218,7 +218,7 @@ namespace cv { namespace gpu { namespace bp
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template <> void comp_data_gpu<uchar4, short>(const DevMem2D& left, const DevMem2D& right, const DevMem2D& data, cudaStream_t stream)
|
template <> void comp_data_gpu<uchar4, short>(const DevMem2D& left, const DevMem2D& right, const DevMem2D& data, cudaStream_t stream)
|
||||||
@@ -233,7 +233,7 @@ namespace cv { namespace gpu { namespace bp
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
template <> void comp_data_gpu<uchar4, float>(const DevMem2D& left, const DevMem2D& right, const DevMem2D& data, cudaStream_t stream)
|
template <> void comp_data_gpu<uchar4, float>(const DevMem2D& left, const DevMem2D& right, const DevMem2D& data, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
@@ -247,7 +247,7 @@ namespace cv { namespace gpu { namespace bp
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
@@ -287,7 +287,7 @@ namespace cv { namespace gpu { namespace bp
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template void data_step_down_gpu<short>(int dst_cols, int dst_rows, int src_rows, const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
template void data_step_down_gpu<short>(int dst_cols, int dst_rows, int src_rows, const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||||
@@ -337,7 +337,7 @@ namespace cv { namespace gpu { namespace bp
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template void level_up_messages_gpu<short>(int dst_idx, int dst_cols, int dst_rows, int src_rows, DevMem2D* mus, DevMem2D* mds, DevMem2D* mls, DevMem2D* mrs, cudaStream_t stream);
|
template void level_up_messages_gpu<short>(int dst_idx, int dst_cols, int dst_rows, int src_rows, DevMem2D* mus, DevMem2D* mds, DevMem2D* mls, DevMem2D* mrs, cudaStream_t stream);
|
||||||
@@ -457,7 +457,7 @@ namespace cv { namespace gpu { namespace bp
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -520,7 +520,7 @@ namespace cv { namespace gpu { namespace bp
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template void output_gpu<short>(const DevMem2D& u, const DevMem2D& d, const DevMem2D& l, const DevMem2D& r, const DevMem2D& data, const DevMem2D_<short>& disp, cudaStream_t stream);
|
template void output_gpu<short>(const DevMem2D& u, const DevMem2D& d, const DevMem2D& l, const DevMem2D& r, const DevMem2D& data, const DevMem2D_<short>& disp, cudaStream_t stream);
|
||||||
|
|||||||
@@ -385,7 +385,7 @@ namespace cv { namespace gpu { namespace csbp
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
|
|
||||||
dim3 threads(32, 8, 1);
|
dim3 threads(32, 8, 1);
|
||||||
dim3 grid(1, 1, 1);
|
dim3 grid(1, 1, 1);
|
||||||
@@ -401,7 +401,7 @@ namespace cv { namespace gpu { namespace csbp
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template void init_data_cost(int rows, int cols, short* disp_selected_pyr, short* data_cost_selected, size_t msg_step,
|
template void init_data_cost(int rows, int cols, short* disp_selected_pyr, short* data_cost_selected, size_t msg_step,
|
||||||
@@ -586,7 +586,7 @@ namespace cv { namespace gpu { namespace csbp
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template void compute_data_cost(const short* disp_selected_pyr, short* data_cost, size_t msg_step1, size_t msg_step2,
|
template void compute_data_cost(const short* disp_selected_pyr, short* data_cost, size_t msg_step1, size_t msg_step2,
|
||||||
@@ -713,7 +713,7 @@ namespace cv { namespace gpu { namespace csbp
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -815,7 +815,7 @@ namespace cv { namespace gpu { namespace csbp
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -885,7 +885,7 @@ namespace cv { namespace gpu { namespace csbp
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template void compute_disp(const short* u, const short* d, const short* l, const short* r, const short* data_cost_selected, const short* disp_selected, size_t msg_step,
|
template void compute_disp(const short* u, const short* d, const short* l, const short* r, const short* data_cost_selected, const short* disp_selected, size_t msg_step,
|
||||||
|
|||||||
@@ -181,7 +181,7 @@ namespace cv { namespace gpu { namespace surf
|
|||||||
icvCalcLayerDetAndTrace<<<grid, threads>>>(det, trace);
|
icvCalcLayerDetAndTrace<<<grid, threads>>>(det, trace);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
@@ -338,7 +338,7 @@ namespace cv { namespace gpu { namespace surf
|
|||||||
|
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
@@ -483,7 +483,7 @@ namespace cv { namespace gpu { namespace surf
|
|||||||
icvInterpolateKeypoint<<<grid, threads>>>(det, maxPosBuffer, featureX, featureY, featureLaplacian, featureSize, featureHessian, featureCounter);
|
icvInterpolateKeypoint<<<grid, threads>>>(det, maxPosBuffer, featureX, featureY, featureLaplacian, featureSize, featureHessian, featureCounter);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
@@ -674,7 +674,7 @@ namespace cv { namespace gpu { namespace surf
|
|||||||
icvCalcOrientation<<<grid, threads>>>(featureX, featureY, featureSize, featureDir);
|
icvCalcOrientation<<<grid, threads>>>(featureX, featureY, featureSize, featureDir);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
@@ -986,24 +986,24 @@ namespace cv { namespace gpu { namespace surf
|
|||||||
compute_descriptors64<<<dim3(nFeatures, 16, 1), dim3(6, 6, 1)>>>(descriptors, featureX, featureY, featureSize, featureDir);
|
compute_descriptors64<<<dim3(nFeatures, 16, 1), dim3(6, 6, 1)>>>(descriptors, featureX, featureY, featureSize, featureDir);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
|
|
||||||
normalize_descriptors<64><<<dim3(nFeatures, 1, 1), dim3(64, 1, 1)>>>(descriptors);
|
normalize_descriptors<64><<<dim3(nFeatures, 1, 1), dim3(64, 1, 1)>>>(descriptors);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
compute_descriptors128<<<dim3(nFeatures, 16, 1), dim3(6, 6, 1)>>>(descriptors, featureX, featureY, featureSize, featureDir);
|
compute_descriptors128<<<dim3(nFeatures, 16, 1), dim3(6, 6, 1)>>>(descriptors, featureX, featureY, featureSize, featureDir);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
|
|
||||||
normalize_descriptors<128><<<dim3(nFeatures, 1, 1), dim3(128, 1, 1)>>>(descriptors);
|
normalize_descriptors<128><<<dim3(nFeatures, 1, 1), dim3(128, 1, 1)>>>(descriptors);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
|
|||||||
@@ -64,6 +64,8 @@ void cv::gpu::Stream::enqueueCopy(const GpuMat& /*src*/, GpuMat& /*dst*/) { thro
|
|||||||
void cv::gpu::Stream::enqueueMemSet(GpuMat& /*src*/, Scalar /*val*/) { throw_nogpu(); }
|
void cv::gpu::Stream::enqueueMemSet(GpuMat& /*src*/, Scalar /*val*/) { throw_nogpu(); }
|
||||||
void cv::gpu::Stream::enqueueMemSet(GpuMat& /*src*/, Scalar /*val*/, const GpuMat& /*mask*/) { throw_nogpu(); }
|
void cv::gpu::Stream::enqueueMemSet(GpuMat& /*src*/, Scalar /*val*/, const GpuMat& /*mask*/) { throw_nogpu(); }
|
||||||
void cv::gpu::Stream::enqueueConvert(const GpuMat& /*src*/, GpuMat& /*dst*/, int /*type*/, double /*a*/, double /*b*/) { throw_nogpu(); }
|
void cv::gpu::Stream::enqueueConvert(const GpuMat& /*src*/, GpuMat& /*dst*/, int /*type*/, double /*a*/, double /*b*/) { throw_nogpu(); }
|
||||||
|
Stream& cv::gpu::Stream::Null() { throw_nogpu(); static Stream s; return s; }
|
||||||
|
cv::gpu::Stream::operator bool() const { throw_nogpu(); return false; }
|
||||||
|
|
||||||
#else /* !defined (HAVE_CUDA) */
|
#else /* !defined (HAVE_CUDA) */
|
||||||
|
|
||||||
@@ -117,7 +119,7 @@ namespace
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
CV_EXPORTS cudaStream_t cv::gpu::StreamAccessor::getStream(const Stream& stream) { return stream.impl->stream; };
|
CV_EXPORTS cudaStream_t cv::gpu::StreamAccessor::getStream(const Stream& stream) { return stream.impl ? stream.impl->stream : 0; };
|
||||||
|
|
||||||
void cv::gpu::Stream::create()
|
void cv::gpu::Stream::create()
|
||||||
{
|
{
|
||||||
@@ -188,18 +190,35 @@ void cv::gpu::Stream::enqueueUpload(const CudaMem& src, GpuMat& dst){ devcopy(sr
|
|||||||
void cv::gpu::Stream::enqueueUpload(const Mat& src, GpuMat& dst) { devcopy(src, dst, impl->stream, cudaMemcpyHostToDevice); }
|
void cv::gpu::Stream::enqueueUpload(const Mat& src, GpuMat& dst) { devcopy(src, dst, impl->stream, cudaMemcpyHostToDevice); }
|
||||||
void cv::gpu::Stream::enqueueCopy(const GpuMat& src, GpuMat& dst) { devcopy(src, dst, impl->stream, cudaMemcpyDeviceToDevice); }
|
void cv::gpu::Stream::enqueueCopy(const GpuMat& src, GpuMat& dst) { devcopy(src, dst, impl->stream, cudaMemcpyDeviceToDevice); }
|
||||||
|
|
||||||
void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val)
|
void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar s)
|
||||||
{
|
{
|
||||||
CV_Assert((src.depth() != CV_64F) ||
|
CV_Assert((src.depth() != CV_64F) ||
|
||||||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
|
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
|
||||||
|
|
||||||
|
if (s[0] == 0.0 && s[1] == 0.0 && s[2] == 0.0 && s[3] == 0.0)
|
||||||
|
{
|
||||||
|
cudaSafeCall( cudaMemset2DAsync(src.data, src.step, 0, src.cols * src.elemSize(), src.rows, impl->stream) );
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (src.depth() == CV_8U)
|
||||||
|
{
|
||||||
|
int cn = src.channels();
|
||||||
|
|
||||||
|
if (cn == 1 || (cn == 2 && s[0] == s[1]) || (cn == 3 && s[0] == s[1] && s[0] == s[2]) || (cn == 4 && s[0] == s[1] && s[0] == s[2] && s[0] == s[3]))
|
||||||
|
{
|
||||||
|
int val = saturate_cast<uchar>(s[0]);
|
||||||
|
cudaSafeCall( cudaMemset2DAsync(src.data, src.step, val, src.cols * src.elemSize(), src.rows, impl->stream) );
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
typedef void (*set_caller_t)(GpuMat& src, const Scalar& s, cudaStream_t stream);
|
typedef void (*set_caller_t)(GpuMat& src, const Scalar& s, cudaStream_t stream);
|
||||||
static const set_caller_t set_callers[] =
|
static const set_caller_t set_callers[] =
|
||||||
{
|
{
|
||||||
kernelSet<uchar>, kernelSet<schar>, kernelSet<ushort>, kernelSet<short>,
|
kernelSet<uchar>, kernelSet<schar>, kernelSet<ushort>, kernelSet<short>,
|
||||||
kernelSet<int>, kernelSet<float>, kernelSet<double>
|
kernelSet<int>, kernelSet<float>, kernelSet<double>
|
||||||
};
|
};
|
||||||
set_callers[src.depth()](src, val, impl->stream);
|
set_callers[src.depth()](src, s, impl->stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val, const GpuMat& mask)
|
void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val, const GpuMat& mask)
|
||||||
@@ -246,5 +265,17 @@ void cv::gpu::Stream::enqueueConvert(const GpuMat& src, GpuMat& dst, int rtype,
|
|||||||
matrix_operations::convert_gpu(psrc->reshape(1), sdepth, dst.reshape(1), ddepth, alpha, beta, impl->stream);
|
matrix_operations::convert_gpu(psrc->reshape(1), sdepth, dst.reshape(1), ddepth, alpha, beta, impl->stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cv::gpu::Stream::operator bool() const
|
||||||
|
{
|
||||||
|
return impl && impl->stream;
|
||||||
|
}
|
||||||
|
|
||||||
|
cv::gpu::Stream::Stream(Impl* impl_) : impl(impl_) {}
|
||||||
|
|
||||||
|
cv::gpu::Stream& cv::gpu::Stream::Null()
|
||||||
|
{
|
||||||
|
static Stream s((Impl*)0);
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
#endif /* !defined (HAVE_CUDA) */
|
#endif /* !defined (HAVE_CUDA) */
|
||||||
|
|||||||
@@ -47,35 +47,26 @@ using namespace cv::gpu;
|
|||||||
|
|
||||||
#if !defined (HAVE_CUDA)
|
#if !defined (HAVE_CUDA)
|
||||||
|
|
||||||
void cv::gpu::add(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); }
|
void cv::gpu::add(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::add(const GpuMat&, const Scalar&, GpuMat&) { throw_nogpu(); }
|
void cv::gpu::add(const GpuMat&, const Scalar&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::subtract(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); }
|
void cv::gpu::subtract(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::subtract(const GpuMat&, const Scalar&, GpuMat&) { throw_nogpu(); }
|
void cv::gpu::subtract(const GpuMat&, const Scalar&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::multiply(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); }
|
void cv::gpu::multiply(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::multiply(const GpuMat&, const Scalar&, GpuMat&) { throw_nogpu(); }
|
void cv::gpu::multiply(const GpuMat&, const Scalar&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::divide(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); }
|
void cv::gpu::divide(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::divide(const GpuMat&, const Scalar&, GpuMat&) { throw_nogpu(); }
|
void cv::gpu::divide(const GpuMat&, const Scalar&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::absdiff(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); }
|
void cv::gpu::absdiff(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::absdiff(const GpuMat&, const Scalar&, GpuMat&) { throw_nogpu(); }
|
void cv::gpu::absdiff(const GpuMat&, const Scalar&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::compare(const GpuMat&, const GpuMat&, GpuMat&, int) { throw_nogpu(); }
|
void cv::gpu::compare(const GpuMat&, const GpuMat&, GpuMat&, int, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::bitwise_not(const GpuMat&, GpuMat&, const GpuMat&) { throw_nogpu(); }
|
void cv::gpu::bitwise_not(const GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::bitwise_not(const GpuMat&, GpuMat&, const GpuMat&, const Stream&) { throw_nogpu(); }
|
void cv::gpu::bitwise_or(const GpuMat&, const GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::bitwise_or(const GpuMat&, const GpuMat&, GpuMat&, const GpuMat&) { throw_nogpu(); }
|
void cv::gpu::bitwise_and(const GpuMat&, const GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::bitwise_or(const GpuMat&, const GpuMat&, GpuMat&, const GpuMat&, const Stream&) { throw_nogpu(); }
|
void cv::gpu::bitwise_xor(const GpuMat&, const GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::bitwise_and(const GpuMat&, const GpuMat&, GpuMat&, const GpuMat&) { throw_nogpu(); }
|
void cv::gpu::min(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::bitwise_and(const GpuMat&, const GpuMat&, GpuMat&, const GpuMat&, const Stream&) { throw_nogpu(); }
|
void cv::gpu::min(const GpuMat&, double, GpuMat&, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::bitwise_xor(const GpuMat&, const GpuMat&, GpuMat&, const GpuMat&) { throw_nogpu(); }
|
void cv::gpu::max(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::bitwise_xor(const GpuMat&, const GpuMat&, GpuMat&, const GpuMat&, const Stream&) { throw_nogpu(); }
|
void cv::gpu::max(const GpuMat&, double, GpuMat&, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::min(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); }
|
double cv::gpu::threshold(const GpuMat&, GpuMat&, double, double, int, Stream&) {throw_nogpu(); return 0.0;}
|
||||||
void cv::gpu::min(const GpuMat&, const GpuMat&, GpuMat&, const Stream&) { throw_nogpu(); }
|
|
||||||
void cv::gpu::min(const GpuMat&, double, GpuMat&) { throw_nogpu(); }
|
|
||||||
void cv::gpu::min(const GpuMat&, double, GpuMat&, const Stream&) { throw_nogpu(); }
|
|
||||||
void cv::gpu::max(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); }
|
|
||||||
void cv::gpu::max(const GpuMat&, const GpuMat&, GpuMat&, const Stream&) { throw_nogpu(); }
|
|
||||||
void cv::gpu::max(const GpuMat&, double, GpuMat&) { throw_nogpu(); }
|
|
||||||
void cv::gpu::max(const GpuMat&, double, GpuMat&, const Stream&) { throw_nogpu(); }
|
|
||||||
double cv::gpu::threshold(const GpuMat&, GpuMat&, double, double, int) {throw_nogpu(); return 0.0;}
|
|
||||||
double cv::gpu::threshold(const GpuMat&, GpuMat&, double, double, int, const Stream&) {throw_nogpu(); return 0.0;}
|
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
@@ -90,7 +81,7 @@ namespace
|
|||||||
|
|
||||||
void nppArithmCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst,
|
void nppArithmCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst,
|
||||||
npp_arithm_8u_t npp_func_8uc1, npp_arithm_8u_t npp_func_8uc4,
|
npp_arithm_8u_t npp_func_8uc1, npp_arithm_8u_t npp_func_8uc4,
|
||||||
npp_arithm_32s_t npp_func_32sc1, npp_arithm_32f_t npp_func_32fc1)
|
npp_arithm_32s_t npp_func_32sc1, npp_arithm_32f_t npp_func_32fc1, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
CV_DbgAssert(src1.size() == src2.size() && src1.type() == src2.type());
|
CV_DbgAssert(src1.size() == src2.size() && src1.type() == src2.type());
|
||||||
CV_Assert(src1.type() == CV_8UC1 || src1.type() == CV_8UC4 || src1.type() == CV_32SC1 || src1.type() == CV_32FC1);
|
CV_Assert(src1.type() == CV_8UC1 || src1.type() == CV_8UC4 || src1.type() == CV_32SC1 || src1.type() == CV_32FC1);
|
||||||
@@ -100,6 +91,8 @@ namespace
|
|||||||
sz.width = src1.cols;
|
sz.width = src1.cols;
|
||||||
sz.height = src1.rows;
|
sz.height = src1.rows;
|
||||||
|
|
||||||
|
NppStreamHandler h(stream);
|
||||||
|
|
||||||
switch (src1.type())
|
switch (src1.type())
|
||||||
{
|
{
|
||||||
case CV_8UC1:
|
case CV_8UC1:
|
||||||
@@ -118,7 +111,8 @@ namespace
|
|||||||
CV_Assert(!"Unsupported source type");
|
CV_Assert(!"Unsupported source type");
|
||||||
}
|
}
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
if (stream == 0)
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int SCN> struct NppArithmScalarFunc;
|
template<int SCN> struct NppArithmScalarFunc;
|
||||||
@@ -135,7 +129,7 @@ namespace
|
|||||||
|
|
||||||
template<typename NppArithmScalarFunc<1>::func_ptr func> struct NppArithmScalar<1, func>
|
template<typename NppArithmScalarFunc<1>::func_ptr func> struct NppArithmScalar<1, func>
|
||||||
{
|
{
|
||||||
static void calc(const GpuMat& src, const Scalar& sc, GpuMat& dst)
|
static void calc(const GpuMat& src, const Scalar& sc, GpuMat& dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dst.create(src.size(), src.type());
|
dst.create(src.size(), src.type());
|
||||||
|
|
||||||
@@ -143,14 +137,17 @@ namespace
|
|||||||
sz.width = src.cols;
|
sz.width = src.cols;
|
||||||
sz.height = src.rows;
|
sz.height = src.rows;
|
||||||
|
|
||||||
|
NppStreamHandler h(stream);
|
||||||
|
|
||||||
nppSafeCall( func(src.ptr<Npp32f>(), src.step, (Npp32f)sc[0], dst.ptr<Npp32f>(), dst.step, sz) );
|
nppSafeCall( func(src.ptr<Npp32f>(), src.step, (Npp32f)sc[0], dst.ptr<Npp32f>(), dst.step, sz) );
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
if (stream == 0)
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
template<typename NppArithmScalarFunc<2>::func_ptr func> struct NppArithmScalar<2, func>
|
template<typename NppArithmScalarFunc<2>::func_ptr func> struct NppArithmScalar<2, func>
|
||||||
{
|
{
|
||||||
static void calc(const GpuMat& src, const Scalar& sc, GpuMat& dst)
|
static void calc(const GpuMat& src, const Scalar& sc, GpuMat& dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dst.create(src.size(), src.type());
|
dst.create(src.size(), src.type());
|
||||||
|
|
||||||
@@ -162,78 +159,81 @@ namespace
|
|||||||
nValue.re = (Npp32f)sc[0];
|
nValue.re = (Npp32f)sc[0];
|
||||||
nValue.im = (Npp32f)sc[1];
|
nValue.im = (Npp32f)sc[1];
|
||||||
|
|
||||||
|
NppStreamHandler h(stream);
|
||||||
|
|
||||||
nppSafeCall( func(src.ptr<Npp32fc>(), src.step, nValue, dst.ptr<Npp32fc>(), dst.step, sz) );
|
nppSafeCall( func(src.ptr<Npp32fc>(), src.step, nValue, dst.ptr<Npp32fc>(), dst.step, sz) );
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
if (stream == 0)
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
|
void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
|
||||||
{
|
{
|
||||||
nppArithmCaller(src1, src2, dst, nppiAdd_8u_C1RSfs, nppiAdd_8u_C4RSfs, nppiAdd_32s_C1R, nppiAdd_32f_C1R);
|
nppArithmCaller(src1, src2, dst, nppiAdd_8u_C1RSfs, nppiAdd_8u_C4RSfs, nppiAdd_32s_C1R, nppiAdd_32f_C1R, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
|
void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
|
||||||
{
|
{
|
||||||
nppArithmCaller(src2, src1, dst, nppiSub_8u_C1RSfs, nppiSub_8u_C4RSfs, nppiSub_32s_C1R, nppiSub_32f_C1R);
|
nppArithmCaller(src2, src1, dst, nppiSub_8u_C1RSfs, nppiSub_8u_C4RSfs, nppiSub_32s_C1R, nppiSub_32f_C1R, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
|
void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
|
||||||
{
|
{
|
||||||
nppArithmCaller(src1, src2, dst, nppiMul_8u_C1RSfs, nppiMul_8u_C4RSfs, nppiMul_32s_C1R, nppiMul_32f_C1R);
|
nppArithmCaller(src1, src2, dst, nppiMul_8u_C1RSfs, nppiMul_8u_C4RSfs, nppiMul_32s_C1R, nppiMul_32f_C1R, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
|
void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
|
||||||
{
|
{
|
||||||
nppArithmCaller(src2, src1, dst, nppiDiv_8u_C1RSfs, nppiDiv_8u_C4RSfs, nppiDiv_32s_C1R, nppiDiv_32f_C1R);
|
nppArithmCaller(src2, src1, dst, nppiDiv_8u_C1RSfs, nppiDiv_8u_C4RSfs, nppiDiv_32s_C1R, nppiDiv_32f_C1R, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst)
|
void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream)
|
||||||
{
|
{
|
||||||
typedef void (*caller_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst);
|
typedef void (*caller_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst, cudaStream_t stream);
|
||||||
static const caller_t callers[] = {0, NppArithmScalar<1, nppiAddC_32f_C1R>::calc, NppArithmScalar<2, nppiAddC_32fc_C1R>::calc};
|
static const caller_t callers[] = {0, NppArithmScalar<1, nppiAddC_32f_C1R>::calc, NppArithmScalar<2, nppiAddC_32fc_C1R>::calc};
|
||||||
|
|
||||||
CV_Assert(src.type() == CV_32FC1 || src.type() == CV_32FC2);
|
CV_Assert(src.type() == CV_32FC1 || src.type() == CV_32FC2);
|
||||||
|
|
||||||
callers[src.channels()](src, sc, dst);
|
callers[src.channels()](src, sc, dst, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst)
|
void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream)
|
||||||
{
|
{
|
||||||
typedef void (*caller_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst);
|
typedef void (*caller_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst, cudaStream_t stream);
|
||||||
static const caller_t callers[] = {0, NppArithmScalar<1, nppiSubC_32f_C1R>::calc, NppArithmScalar<2, nppiSubC_32fc_C1R>::calc};
|
static const caller_t callers[] = {0, NppArithmScalar<1, nppiSubC_32f_C1R>::calc, NppArithmScalar<2, nppiSubC_32fc_C1R>::calc};
|
||||||
|
|
||||||
CV_Assert(src.type() == CV_32FC1 || src.type() == CV_32FC2);
|
CV_Assert(src.type() == CV_32FC1 || src.type() == CV_32FC2);
|
||||||
|
|
||||||
callers[src.channels()](src, sc, dst);
|
callers[src.channels()](src, sc, dst, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst)
|
void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream)
|
||||||
{
|
{
|
||||||
typedef void (*caller_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst);
|
typedef void (*caller_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst, cudaStream_t stream);
|
||||||
static const caller_t callers[] = {0, NppArithmScalar<1, nppiMulC_32f_C1R>::calc, NppArithmScalar<2, nppiMulC_32fc_C1R>::calc};
|
static const caller_t callers[] = {0, NppArithmScalar<1, nppiMulC_32f_C1R>::calc, NppArithmScalar<2, nppiMulC_32fc_C1R>::calc};
|
||||||
|
|
||||||
CV_Assert(src.type() == CV_32FC1 || src.type() == CV_32FC2);
|
CV_Assert(src.type() == CV_32FC1 || src.type() == CV_32FC2);
|
||||||
|
|
||||||
callers[src.channels()](src, sc, dst);
|
callers[src.channels()](src, sc, dst, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst)
|
void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream)
|
||||||
{
|
{
|
||||||
typedef void (*caller_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst);
|
typedef void (*caller_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst, cudaStream_t stream);
|
||||||
static const caller_t callers[] = {0, NppArithmScalar<1, nppiDivC_32f_C1R>::calc, NppArithmScalar<2, nppiDivC_32fc_C1R>::calc};
|
static const caller_t callers[] = {0, NppArithmScalar<1, nppiDivC_32f_C1R>::calc, NppArithmScalar<2, nppiDivC_32fc_C1R>::calc};
|
||||||
|
|
||||||
CV_Assert(src.type() == CV_32FC1 || src.type() == CV_32FC2);
|
CV_Assert(src.type() == CV_32FC1 || src.type() == CV_32FC2);
|
||||||
|
|
||||||
callers[src.channels()](src, sc, dst);
|
callers[src.channels()](src, sc, dst, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
// Absolute difference
|
// Absolute difference
|
||||||
|
|
||||||
void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
|
void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s)
|
||||||
{
|
{
|
||||||
CV_DbgAssert(src1.size() == src2.size() && src1.type() == src2.type());
|
CV_DbgAssert(src1.size() == src2.size() && src1.type() == src2.type());
|
||||||
|
|
||||||
@@ -245,6 +245,10 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
|
|||||||
sz.width = src1.cols;
|
sz.width = src1.cols;
|
||||||
sz.height = src1.rows;
|
sz.height = src1.rows;
|
||||||
|
|
||||||
|
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||||
|
|
||||||
|
NppStreamHandler h(stream);
|
||||||
|
|
||||||
switch (src1.type())
|
switch (src1.type())
|
||||||
{
|
{
|
||||||
case CV_8UC1:
|
case CV_8UC1:
|
||||||
@@ -263,22 +267,28 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
|
|||||||
CV_Assert(!"Unsupported source type");
|
CV_Assert(!"Unsupported source type");
|
||||||
}
|
}
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
if (stream == 0)
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::absdiff(const GpuMat& src, const Scalar& s, GpuMat& dst)
|
void cv::gpu::absdiff(const GpuMat& src1, const Scalar& src2, GpuMat& dst, Stream& s)
|
||||||
{
|
{
|
||||||
CV_Assert(src.type() == CV_32FC1);
|
CV_Assert(src1.type() == CV_32FC1);
|
||||||
|
|
||||||
dst.create( src.size(), src.type() );
|
dst.create( src1.size(), src1.type() );
|
||||||
|
|
||||||
NppiSize sz;
|
NppiSize sz;
|
||||||
sz.width = src.cols;
|
sz.width = src1.cols;
|
||||||
sz.height = src.rows;
|
sz.height = src1.rows;
|
||||||
|
|
||||||
nppSafeCall( nppiAbsDiffC_32f_C1R(src.ptr<Npp32f>(), src.step, dst.ptr<Npp32f>(), dst.step, sz, (Npp32f)s[0]) );
|
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
NppStreamHandler h(stream);
|
||||||
|
|
||||||
|
nppSafeCall( nppiAbsDiffC_32f_C1R(src1.ptr<Npp32f>(), src1.step, dst.ptr<Npp32f>(), dst.step, sz, (Npp32f)src2[0]) );
|
||||||
|
|
||||||
|
if (stream == 0)
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -287,11 +297,11 @@ void cv::gpu::absdiff(const GpuMat& src, const Scalar& s, GpuMat& dst)
|
|||||||
|
|
||||||
namespace cv { namespace gpu { namespace mathfunc
|
namespace cv { namespace gpu { namespace mathfunc
|
||||||
{
|
{
|
||||||
void compare_ne_8uc4(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst);
|
void compare_ne_8uc4(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst, cudaStream_t stream);
|
||||||
void compare_ne_32f(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst);
|
void compare_ne_32f(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst, cudaStream_t stream);
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int cmpop)
|
void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int cmpop, Stream& s)
|
||||||
{
|
{
|
||||||
CV_DbgAssert(src1.size() == src2.size() && src1.type() == src2.type());
|
CV_DbgAssert(src1.size() == src2.size() && src1.type() == src2.type());
|
||||||
|
|
||||||
@@ -305,34 +315,42 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c
|
|||||||
sz.width = src1.cols;
|
sz.width = src1.cols;
|
||||||
sz.height = src1.rows;
|
sz.height = src1.rows;
|
||||||
|
|
||||||
|
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||||
|
|
||||||
if (src1.type() == CV_8UC4)
|
if (src1.type() == CV_8UC4)
|
||||||
{
|
{
|
||||||
if (cmpop != CMP_NE)
|
if (cmpop != CMP_NE)
|
||||||
{
|
{
|
||||||
|
NppStreamHandler h(stream);
|
||||||
|
|
||||||
nppSafeCall( nppiCompare_8u_C4R(src1.ptr<Npp8u>(), src1.step,
|
nppSafeCall( nppiCompare_8u_C4R(src1.ptr<Npp8u>(), src1.step,
|
||||||
src2.ptr<Npp8u>(), src2.step,
|
src2.ptr<Npp8u>(), src2.step,
|
||||||
dst.ptr<Npp8u>(), dst.step, sz, nppCmpOp[cmpop]) );
|
dst.ptr<Npp8u>(), dst.step, sz, nppCmpOp[cmpop]) );
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
if (stream == 0)
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
mathfunc::compare_ne_8uc4(src1, src2, dst);
|
mathfunc::compare_ne_8uc4(src1, src2, dst, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (cmpop != CMP_NE)
|
if (cmpop != CMP_NE)
|
||||||
{
|
{
|
||||||
|
NppStreamHandler h(stream);
|
||||||
|
|
||||||
nppSafeCall( nppiCompare_32f_C1R(src1.ptr<Npp32f>(), src1.step,
|
nppSafeCall( nppiCompare_32f_C1R(src1.ptr<Npp32f>(), src1.step,
|
||||||
src2.ptr<Npp32f>(), src2.step,
|
src2.ptr<Npp32f>(), src2.step,
|
||||||
dst.ptr<Npp8u>(), dst.step, sz, nppCmpOp[cmpop]) );
|
dst.ptr<Npp8u>(), dst.step, sz, nppCmpOp[cmpop]) );
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
if (stream == 0)
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
mathfunc::compare_ne_32f(src1, src2, dst);
|
mathfunc::compare_ne_32f(src1, src2, dst, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -383,16 +401,7 @@ namespace
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void cv::gpu::bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask)
|
void cv::gpu::bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask, Stream& stream)
|
||||||
{
|
|
||||||
if (mask.empty())
|
|
||||||
::bitwiseNotCaller(src, dst, 0);
|
|
||||||
else
|
|
||||||
::bitwiseNotCaller(src, dst, mask, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void cv::gpu::bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask, const Stream& stream)
|
|
||||||
{
|
{
|
||||||
if (mask.empty())
|
if (mask.empty())
|
||||||
::bitwiseNotCaller(src, dst, StreamAccessor::getStream(stream));
|
::bitwiseNotCaller(src, dst, StreamAccessor::getStream(stream));
|
||||||
@@ -519,16 +528,7 @@ namespace
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void cv::gpu::bitwise_or(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask)
|
void cv::gpu::bitwise_or(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, Stream& stream)
|
||||||
{
|
|
||||||
if (mask.empty())
|
|
||||||
::bitwiseOrCaller(src1, src2, dst, 0);
|
|
||||||
else
|
|
||||||
::bitwiseOrCaller(src1, src2, dst, mask, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void cv::gpu::bitwise_or(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, const Stream& stream)
|
|
||||||
{
|
{
|
||||||
if (mask.empty())
|
if (mask.empty())
|
||||||
::bitwiseOrCaller(src1, src2, dst, StreamAccessor::getStream(stream));
|
::bitwiseOrCaller(src1, src2, dst, StreamAccessor::getStream(stream));
|
||||||
@@ -537,16 +537,7 @@ void cv::gpu::bitwise_or(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, co
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void cv::gpu::bitwise_and(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask)
|
void cv::gpu::bitwise_and(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, Stream& stream)
|
||||||
{
|
|
||||||
if (mask.empty())
|
|
||||||
::bitwiseAndCaller(src1, src2, dst, 0);
|
|
||||||
else
|
|
||||||
::bitwiseAndCaller(src1, src2, dst, mask, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void cv::gpu::bitwise_and(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, const Stream& stream)
|
|
||||||
{
|
{
|
||||||
if (mask.empty())
|
if (mask.empty())
|
||||||
::bitwiseAndCaller(src1, src2, dst, StreamAccessor::getStream(stream));
|
::bitwiseAndCaller(src1, src2, dst, StreamAccessor::getStream(stream));
|
||||||
@@ -555,16 +546,7 @@ void cv::gpu::bitwise_and(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, c
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void cv::gpu::bitwise_xor(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask)
|
void cv::gpu::bitwise_xor(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, Stream& stream)
|
||||||
{
|
|
||||||
if (mask.empty())
|
|
||||||
::bitwiseXorCaller(src1, src2, dst, 0);
|
|
||||||
else
|
|
||||||
::bitwiseXorCaller(src1, src2, dst, mask, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void cv::gpu::bitwise_xor(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, const Stream& stream)
|
|
||||||
{
|
{
|
||||||
if (mask.empty())
|
if (mask.empty())
|
||||||
::bitwiseXorCaller(src1, src2, dst, StreamAccessor::getStream(stream));
|
::bitwiseXorCaller(src1, src2, dst, StreamAccessor::getStream(stream));
|
||||||
@@ -624,22 +606,7 @@ namespace
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
|
void cv::gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
|
||||||
{
|
|
||||||
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
|
|
||||||
CV_Assert((src1.depth() != CV_64F) ||
|
|
||||||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
|
|
||||||
|
|
||||||
typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream);
|
|
||||||
static const func_t funcs[] =
|
|
||||||
{
|
|
||||||
min_caller<uchar>, min_caller<schar>, min_caller<ushort>, min_caller<short>, min_caller<int>,
|
|
||||||
min_caller<float>, min_caller<double>
|
|
||||||
};
|
|
||||||
funcs[src1.depth()](src1, src2, dst, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
void cv::gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Stream& stream)
|
|
||||||
{
|
{
|
||||||
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
|
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
|
||||||
CV_Assert((src1.depth() != CV_64F) ||
|
CV_Assert((src1.depth() != CV_64F) ||
|
||||||
@@ -653,22 +620,7 @@ void cv::gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Str
|
|||||||
};
|
};
|
||||||
funcs[src1.depth()](src1, src2, dst, StreamAccessor::getStream(stream));
|
funcs[src1.depth()](src1, src2, dst, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
void cv::gpu::min(const GpuMat& src1, double src2, GpuMat& dst, Stream& stream)
|
||||||
void cv::gpu::min(const GpuMat& src1, double src2, GpuMat& dst)
|
|
||||||
{
|
|
||||||
CV_Assert((src1.depth() != CV_64F) ||
|
|
||||||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
|
|
||||||
|
|
||||||
typedef void (*func_t)(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream);
|
|
||||||
static const func_t funcs[] =
|
|
||||||
{
|
|
||||||
min_caller<uchar>, min_caller<schar>, min_caller<ushort>, min_caller<short>, min_caller<int>,
|
|
||||||
min_caller<float>, min_caller<double>
|
|
||||||
};
|
|
||||||
funcs[src1.depth()](src1, src2, dst, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
void cv::gpu::min(const GpuMat& src1, double src2, GpuMat& dst, const Stream& stream)
|
|
||||||
{
|
{
|
||||||
CV_Assert((src1.depth() != CV_64F) ||
|
CV_Assert((src1.depth() != CV_64F) ||
|
||||||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
|
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
|
||||||
@@ -682,22 +634,7 @@ void cv::gpu::min(const GpuMat& src1, double src2, GpuMat& dst, const Stream& st
|
|||||||
funcs[src1.depth()](src1, src2, dst, StreamAccessor::getStream(stream));
|
funcs[src1.depth()](src1, src2, dst, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
|
void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
|
||||||
{
|
|
||||||
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
|
|
||||||
CV_Assert((src1.depth() != CV_64F) ||
|
|
||||||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
|
|
||||||
|
|
||||||
typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream);
|
|
||||||
static const func_t funcs[] =
|
|
||||||
{
|
|
||||||
max_caller<uchar>, max_caller<schar>, max_caller<ushort>, max_caller<short>, max_caller<int>,
|
|
||||||
max_caller<float>, max_caller<double>
|
|
||||||
};
|
|
||||||
funcs[src1.depth()](src1, src2, dst, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Stream& stream)
|
|
||||||
{
|
{
|
||||||
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
|
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
|
||||||
CV_Assert((src1.depth() != CV_64F) ||
|
CV_Assert((src1.depth() != CV_64F) ||
|
||||||
@@ -712,21 +649,7 @@ void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Str
|
|||||||
funcs[src1.depth()](src1, src2, dst, StreamAccessor::getStream(stream));
|
funcs[src1.depth()](src1, src2, dst, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::max(const GpuMat& src1, double src2, GpuMat& dst)
|
void cv::gpu::max(const GpuMat& src1, double src2, GpuMat& dst, Stream& stream)
|
||||||
{
|
|
||||||
CV_Assert((src1.depth() != CV_64F) ||
|
|
||||||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
|
|
||||||
|
|
||||||
typedef void (*func_t)(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream);
|
|
||||||
static const func_t funcs[] =
|
|
||||||
{
|
|
||||||
max_caller<uchar>, max_caller<schar>, max_caller<ushort>, max_caller<short>, max_caller<int>,
|
|
||||||
max_caller<float>, max_caller<double>
|
|
||||||
};
|
|
||||||
funcs[src1.depth()](src1, src2, dst, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
void cv::gpu::max(const GpuMat& src1, double src2, GpuMat& dst, const Stream& stream)
|
|
||||||
{
|
{
|
||||||
CV_Assert((src1.depth() != CV_64F) ||
|
CV_Assert((src1.depth() != CV_64F) ||
|
||||||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
|
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
|
||||||
@@ -760,10 +683,14 @@ namespace
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double maxVal, int type)
|
double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double maxVal, int type, Stream& s)
|
||||||
{
|
{
|
||||||
|
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||||
|
|
||||||
if (src.type() == CV_32FC1 && type == THRESH_TRUNC)
|
if (src.type() == CV_32FC1 && type == THRESH_TRUNC)
|
||||||
{
|
{
|
||||||
|
NppStreamHandler h(stream);
|
||||||
|
|
||||||
dst.create(src.size(), src.type());
|
dst.create(src.size(), src.type());
|
||||||
|
|
||||||
NppiSize sz;
|
NppiSize sz;
|
||||||
@@ -773,7 +700,8 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double
|
|||||||
nppSafeCall( nppiThreshold_32f_C1R(src.ptr<Npp32f>(), src.step,
|
nppSafeCall( nppiThreshold_32f_C1R(src.ptr<Npp32f>(), src.step,
|
||||||
dst.ptr<Npp32f>(), dst.step, sz, static_cast<Npp32f>(thresh), NPP_CMP_GREATER) );
|
dst.ptr<Npp32f>(), dst.step, sz, static_cast<Npp32f>(thresh), NPP_CMP_GREATER) );
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
if (stream == 0)
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@@ -801,37 +729,10 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double
|
|||||||
maxVal = cvRound(maxVal);
|
maxVal = cvRound(maxVal);
|
||||||
}
|
}
|
||||||
|
|
||||||
callers[src.depth()](src, dst, thresh, maxVal, type, 0);
|
callers[src.depth()](src, dst, thresh, maxVal, type, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
return thresh;
|
return thresh;
|
||||||
}
|
}
|
||||||
|
|
||||||
double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double maxVal, int type, const Stream& stream)
|
|
||||||
{
|
|
||||||
typedef void (*caller_t)(const GpuMat& src, GpuMat& dst, double thresh, double maxVal, int type,
|
|
||||||
cudaStream_t stream);
|
|
||||||
|
|
||||||
static const caller_t callers[] =
|
|
||||||
{
|
|
||||||
threshold_caller<unsigned char>, threshold_caller<signed char>,
|
|
||||||
threshold_caller<unsigned short>, threshold_caller<short>,
|
|
||||||
threshold_caller<int>, threshold_caller<float>, threshold_caller<double>
|
|
||||||
};
|
|
||||||
|
|
||||||
CV_Assert(src.channels() == 1 && src.depth() <= CV_64F);
|
|
||||||
CV_Assert(type <= THRESH_TOZERO_INV);
|
|
||||||
|
|
||||||
dst.create(src.size(), src.type());
|
|
||||||
|
|
||||||
if (src.depth() != CV_32F)
|
|
||||||
{
|
|
||||||
thresh = cvFloor(thresh);
|
|
||||||
maxVal = cvRound(maxVal);
|
|
||||||
}
|
|
||||||
|
|
||||||
callers[src.depth()](src, dst, thresh, maxVal, type, StreamAccessor::getStream(stream));
|
|
||||||
return thresh;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
@@ -66,16 +66,16 @@ Ptr<FilterEngine_GPU> cv::gpu::createGaussianFilter_GPU(int, Size, double, doubl
|
|||||||
Ptr<BaseFilter_GPU> cv::gpu::getMaxFilter_GPU(int, int, const Size&, Point) { throw_nogpu(); return Ptr<BaseFilter_GPU>(0); }
|
Ptr<BaseFilter_GPU> cv::gpu::getMaxFilter_GPU(int, int, const Size&, Point) { throw_nogpu(); return Ptr<BaseFilter_GPU>(0); }
|
||||||
Ptr<BaseFilter_GPU> cv::gpu::getMinFilter_GPU(int, int, const Size&, Point) { throw_nogpu(); return Ptr<BaseFilter_GPU>(0); }
|
Ptr<BaseFilter_GPU> cv::gpu::getMinFilter_GPU(int, int, const Size&, Point) { throw_nogpu(); return Ptr<BaseFilter_GPU>(0); }
|
||||||
|
|
||||||
void cv::gpu::boxFilter(const GpuMat&, GpuMat&, int, Size, Point) { throw_nogpu(); }
|
void cv::gpu::boxFilter(const GpuMat&, GpuMat&, int, Size, Point, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::erode( const GpuMat&, GpuMat&, const Mat&, Point, int) { throw_nogpu(); }
|
void cv::gpu::erode( const GpuMat&, GpuMat&, const Mat&, Point, int, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::dilate( const GpuMat&, GpuMat&, const Mat&, Point, int) { throw_nogpu(); }
|
void cv::gpu::dilate( const GpuMat&, GpuMat&, const Mat&, Point, int, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::morphologyEx( const GpuMat&, GpuMat&, int, const Mat&, Point, int) { throw_nogpu(); }
|
void cv::gpu::morphologyEx( const GpuMat&, GpuMat&, int, const Mat&, Point, int, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::filter2D(const GpuMat&, GpuMat&, int, const Mat&, Point) { throw_nogpu(); }
|
void cv::gpu::filter2D(const GpuMat&, GpuMat&, int, const Mat&, Point, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::sepFilter2D(const GpuMat&, GpuMat&, int, const Mat&, const Mat&, Point, int, int) { throw_nogpu(); }
|
void cv::gpu::sepFilter2D(const GpuMat&, GpuMat&, int, const Mat&, const Mat&, Point, int, int, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::Sobel(const GpuMat&, GpuMat&, int, int, int, int, double, int, int) { throw_nogpu(); }
|
void cv::gpu::Sobel(const GpuMat&, GpuMat&, int, int, int, int, double, int, int, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::Scharr(const GpuMat&, GpuMat&, int, int, int, double, int, int) { throw_nogpu(); }
|
void cv::gpu::Scharr(const GpuMat&, GpuMat&, int, int, int, double, int, int, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::GaussianBlur(const GpuMat&, GpuMat&, Size, double, double, int, int) { throw_nogpu(); }
|
void cv::gpu::GaussianBlur(const GpuMat&, GpuMat&, Size, double, double, int, int, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::Laplacian(const GpuMat&, GpuMat&, int, int, double) { throw_nogpu(); }
|
void cv::gpu::Laplacian(const GpuMat&, GpuMat&, int, int, double, Stream&) { throw_nogpu(); }
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
@@ -137,21 +137,25 @@ namespace
|
|||||||
filter2D(filter2D_), srcType(srcType_), dstType(dstType_)
|
filter2D(filter2D_), srcType(srcType_), dstType(dstType_)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
virtual void apply(const GpuMat& src, GpuMat& dst, Rect roi = Rect(0,0,-1,-1))
|
virtual void apply(const GpuMat& src, GpuMat& dst, Rect roi = Rect(0,0,-1,-1), Stream& stream = Stream::Null())
|
||||||
{
|
{
|
||||||
CV_Assert(src.type() == srcType);
|
CV_Assert(src.type() == srcType);
|
||||||
|
|
||||||
Size src_size = src.size();
|
Size src_size = src.size();
|
||||||
|
|
||||||
dst.create(src_size, dstType);
|
dst.create(src_size, dstType);
|
||||||
dst = Scalar(0.0);
|
|
||||||
|
if (stream)
|
||||||
|
stream.enqueueMemSet(dst, Scalar::all(0.0));
|
||||||
|
else
|
||||||
|
dst.setTo(Scalar::all(0.0));
|
||||||
|
|
||||||
normalizeROI(roi, filter2D->ksize, filter2D->anchor, src_size);
|
normalizeROI(roi, filter2D->ksize, filter2D->anchor, src_size);
|
||||||
|
|
||||||
GpuMat srcROI = src(roi);
|
GpuMat srcROI = src(roi);
|
||||||
GpuMat dstROI = dst(roi);
|
GpuMat dstROI = dst(roi);
|
||||||
|
|
||||||
(*filter2D)(srcROI, dstROI);
|
(*filter2D)(srcROI, dstROI, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
Ptr<BaseFilter_GPU> filter2D;
|
Ptr<BaseFilter_GPU> filter2D;
|
||||||
@@ -181,16 +185,25 @@ namespace
|
|||||||
anchor = Point(rowFilter->anchor, columnFilter->anchor);
|
anchor = Point(rowFilter->anchor, columnFilter->anchor);
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual void apply(const GpuMat& src, GpuMat& dst, Rect roi = Rect(0,0,-1,-1))
|
virtual void apply(const GpuMat& src, GpuMat& dst, Rect roi = Rect(0,0,-1,-1), Stream& stream = Stream::Null())
|
||||||
{
|
{
|
||||||
CV_Assert(src.type() == srcType);
|
CV_Assert(src.type() == srcType);
|
||||||
|
|
||||||
Size src_size = src.size();
|
Size src_size = src.size();
|
||||||
|
|
||||||
dst.create(src_size, dstType);
|
dst.create(src_size, dstType);
|
||||||
dst = Scalar(0.0);
|
|
||||||
dstBuf.create(src_size, bufType);
|
dstBuf.create(src_size, bufType);
|
||||||
dstBuf = Scalar(0.0);
|
|
||||||
|
if (stream)
|
||||||
|
{
|
||||||
|
stream.enqueueMemSet(dst, Scalar::all(0));
|
||||||
|
stream.enqueueMemSet(dstBuf, Scalar::all(0));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
dst = Scalar(0.0);
|
||||||
|
dstBuf = Scalar(0.0);
|
||||||
|
}
|
||||||
|
|
||||||
normalizeROI(roi, ksize, anchor, src_size);
|
normalizeROI(roi, ksize, anchor, src_size);
|
||||||
|
|
||||||
@@ -198,8 +211,8 @@ namespace
|
|||||||
GpuMat dstROI = dst(roi);
|
GpuMat dstROI = dst(roi);
|
||||||
GpuMat dstBufROI = dstBuf(roi);
|
GpuMat dstBufROI = dstBuf(roi);
|
||||||
|
|
||||||
(*rowFilter)(srcROI, dstBufROI);
|
(*rowFilter)(srcROI, dstBufROI, stream);
|
||||||
(*columnFilter)(dstBufROI, dstROI);
|
(*columnFilter)(dstBufROI, dstROI, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
Ptr<BaseRowFilter_GPU> rowFilter;
|
Ptr<BaseRowFilter_GPU> rowFilter;
|
||||||
@@ -229,15 +242,20 @@ namespace
|
|||||||
public:
|
public:
|
||||||
NppRowSumFilter(int ksize_, int anchor_) : BaseRowFilter_GPU(ksize_, anchor_) {}
|
NppRowSumFilter(int ksize_, int anchor_) : BaseRowFilter_GPU(ksize_, anchor_) {}
|
||||||
|
|
||||||
virtual void operator()(const GpuMat& src, GpuMat& dst)
|
virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null())
|
||||||
{
|
{
|
||||||
NppiSize sz;
|
NppiSize sz;
|
||||||
sz.width = src.cols;
|
sz.width = src.cols;
|
||||||
sz.height = src.rows;
|
sz.height = src.rows;
|
||||||
|
|
||||||
|
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||||
|
|
||||||
|
NppStreamHandler h(stream);
|
||||||
|
|
||||||
nppSafeCall( nppiSumWindowRow_8u32f_C1R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp32f>(), dst.step, sz, ksize, anchor) );
|
nppSafeCall( nppiSumWindowRow_8u32f_C1R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp32f>(), dst.step, sz, ksize, anchor) );
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
if (stream == 0)
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@@ -258,15 +276,20 @@ namespace
|
|||||||
public:
|
public:
|
||||||
NppColumnSumFilter(int ksize_, int anchor_) : BaseColumnFilter_GPU(ksize_, anchor_) {}
|
NppColumnSumFilter(int ksize_, int anchor_) : BaseColumnFilter_GPU(ksize_, anchor_) {}
|
||||||
|
|
||||||
virtual void operator()(const GpuMat& src, GpuMat& dst)
|
virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null())
|
||||||
{
|
{
|
||||||
NppiSize sz;
|
NppiSize sz;
|
||||||
sz.width = src.cols;
|
sz.width = src.cols;
|
||||||
sz.height = src.rows;
|
sz.height = src.rows;
|
||||||
|
|
||||||
|
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||||
|
|
||||||
|
NppStreamHandler h(stream);
|
||||||
|
|
||||||
nppSafeCall( nppiSumWindowColumn_8u32f_C1R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp32f>(), dst.step, sz, ksize, anchor) );
|
nppSafeCall( nppiSumWindowColumn_8u32f_C1R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp32f>(), dst.step, sz, ksize, anchor) );
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
if (stream == 0)
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@@ -293,7 +316,7 @@ namespace
|
|||||||
public:
|
public:
|
||||||
NPPBoxFilter(const Size& ksize_, const Point& anchor_, nppFilterBox_t func_) : BaseFilter_GPU(ksize_, anchor_), func(func_) {}
|
NPPBoxFilter(const Size& ksize_, const Point& anchor_, nppFilterBox_t func_) : BaseFilter_GPU(ksize_, anchor_), func(func_) {}
|
||||||
|
|
||||||
virtual void operator()(const GpuMat& src, GpuMat& dst)
|
virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null())
|
||||||
{
|
{
|
||||||
NppiSize sz;
|
NppiSize sz;
|
||||||
sz.width = src.cols;
|
sz.width = src.cols;
|
||||||
@@ -305,9 +328,14 @@ namespace
|
|||||||
oAnchor.x = anchor.x;
|
oAnchor.x = anchor.x;
|
||||||
oAnchor.y = anchor.y;
|
oAnchor.y = anchor.y;
|
||||||
|
|
||||||
|
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||||
|
|
||||||
|
NppStreamHandler h(stream);
|
||||||
|
|
||||||
nppSafeCall( func(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz, oKernelSize, oAnchor) );
|
nppSafeCall( func(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz, oKernelSize, oAnchor) );
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
if (stream == 0)
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
nppFilterBox_t func;
|
nppFilterBox_t func;
|
||||||
@@ -331,7 +359,7 @@ Ptr<FilterEngine_GPU> cv::gpu::createBoxFilter_GPU(int srcType, int dstType, con
|
|||||||
return createFilter2D_GPU(boxFilter, srcType, dstType);
|
return createFilter2D_GPU(boxFilter, srcType, dstType);
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::boxFilter(const GpuMat& src, GpuMat& dst, int ddepth, Size ksize, Point anchor)
|
void cv::gpu::boxFilter(const GpuMat& src, GpuMat& dst, int ddepth, Size ksize, Point anchor, Stream& stream)
|
||||||
{
|
{
|
||||||
int sdepth = src.depth(), cn = src.channels();
|
int sdepth = src.depth(), cn = src.channels();
|
||||||
if( ddepth < 0 )
|
if( ddepth < 0 )
|
||||||
@@ -340,7 +368,7 @@ void cv::gpu::boxFilter(const GpuMat& src, GpuMat& dst, int ddepth, Size ksize,
|
|||||||
dst.create(src.size(), CV_MAKETYPE(ddepth, cn));
|
dst.create(src.size(), CV_MAKETYPE(ddepth, cn));
|
||||||
|
|
||||||
Ptr<FilterEngine_GPU> f = createBoxFilter_GPU(src.type(), dst.type(), ksize, anchor);
|
Ptr<FilterEngine_GPU> f = createBoxFilter_GPU(src.type(), dst.type(), ksize, anchor);
|
||||||
f->apply(src, dst);
|
f->apply(src, dst, Rect(0,0,-1,-1), stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
@@ -356,7 +384,7 @@ namespace
|
|||||||
NPPMorphFilter(const Size& ksize_, const Point& anchor_, const GpuMat& kernel_, nppMorfFilter_t func_) :
|
NPPMorphFilter(const Size& ksize_, const Point& anchor_, const GpuMat& kernel_, nppMorfFilter_t func_) :
|
||||||
BaseFilter_GPU(ksize_, anchor_), kernel(kernel_), func(func_) {}
|
BaseFilter_GPU(ksize_, anchor_), kernel(kernel_), func(func_) {}
|
||||||
|
|
||||||
virtual void operator()(const GpuMat& src, GpuMat& dst)
|
virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null())
|
||||||
{
|
{
|
||||||
NppiSize sz;
|
NppiSize sz;
|
||||||
sz.width = src.cols;
|
sz.width = src.cols;
|
||||||
@@ -368,9 +396,14 @@ namespace
|
|||||||
oAnchor.x = anchor.x;
|
oAnchor.x = anchor.x;
|
||||||
oAnchor.y = anchor.y;
|
oAnchor.y = anchor.y;
|
||||||
|
|
||||||
|
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||||
|
|
||||||
|
NppStreamHandler h(stream);
|
||||||
|
|
||||||
nppSafeCall( func(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz, kernel.ptr<Npp8u>(), oKernelSize, oAnchor) );
|
nppSafeCall( func(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz, kernel.ptr<Npp8u>(), oKernelSize, oAnchor) );
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
if (stream == 0)
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
GpuMat kernel;
|
GpuMat kernel;
|
||||||
@@ -404,16 +437,16 @@ namespace
|
|||||||
MorphologyFilterEngine_GPU(const Ptr<BaseFilter_GPU>& filter2D_, int type, int iters_) :
|
MorphologyFilterEngine_GPU(const Ptr<BaseFilter_GPU>& filter2D_, int type, int iters_) :
|
||||||
Filter2DEngine_GPU(filter2D_, type, type), iters(iters_) {}
|
Filter2DEngine_GPU(filter2D_, type, type), iters(iters_) {}
|
||||||
|
|
||||||
virtual void apply(const GpuMat& src, GpuMat& dst, Rect roi = Rect(0,0,-1,-1))
|
virtual void apply(const GpuMat& src, GpuMat& dst, Rect roi = Rect(0,0,-1,-1), Stream& stream = Stream::Null())
|
||||||
{
|
{
|
||||||
if (iters > 1)
|
if (iters > 1)
|
||||||
morfBuf.create(src.size(), src.type());
|
morfBuf.create(src.size(), src.type());
|
||||||
|
|
||||||
Filter2DEngine_GPU::apply(src, dst);
|
Filter2DEngine_GPU::apply(src, dst, roi, stream);
|
||||||
for(int i = 1; i < iters; ++i)
|
for(int i = 1; i < iters; ++i)
|
||||||
{
|
{
|
||||||
dst.swap(morfBuf);
|
dst.swap(morfBuf);
|
||||||
Filter2DEngine_GPU::apply(morfBuf, dst);
|
Filter2DEngine_GPU::apply(morfBuf, dst, roi, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -435,7 +468,7 @@ Ptr<FilterEngine_GPU> cv::gpu::createMorphologyFilter_GPU(int op, int type, cons
|
|||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
void morphOp(int op, const GpuMat& src, GpuMat& dst, const Mat& _kernel, Point anchor, int iterations)
|
void morphOp(int op, const GpuMat& src, GpuMat& dst, const Mat& _kernel, Point anchor, int iterations, Stream& stream)
|
||||||
{
|
{
|
||||||
Mat kernel;
|
Mat kernel;
|
||||||
Size ksize = _kernel.data ? _kernel.size() : Size(3, 3);
|
Size ksize = _kernel.data ? _kernel.size() : Size(3, 3);
|
||||||
@@ -444,7 +477,10 @@ namespace
|
|||||||
|
|
||||||
if (iterations == 0 || _kernel.rows * _kernel.cols == 1)
|
if (iterations == 0 || _kernel.rows * _kernel.cols == 1)
|
||||||
{
|
{
|
||||||
src.copyTo(dst);
|
if (stream)
|
||||||
|
stream.enqueueCopy(src, dst);
|
||||||
|
else
|
||||||
|
src.copyTo(dst);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -468,49 +504,49 @@ namespace
|
|||||||
|
|
||||||
Ptr<FilterEngine_GPU> f = createMorphologyFilter_GPU(op, src.type(), kernel, anchor, iterations);
|
Ptr<FilterEngine_GPU> f = createMorphologyFilter_GPU(op, src.type(), kernel, anchor, iterations);
|
||||||
|
|
||||||
f->apply(src, dst);
|
f->apply(src, dst, Rect(0,0,-1,-1), stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::erode( const GpuMat& src, GpuMat& dst, const Mat& kernel, Point anchor, int iterations)
|
void cv::gpu::erode( const GpuMat& src, GpuMat& dst, const Mat& kernel, Point anchor, int iterations, Stream& stream)
|
||||||
{
|
{
|
||||||
morphOp(MORPH_ERODE, src, dst, kernel, anchor, iterations);
|
morphOp(MORPH_ERODE, src, dst, kernel, anchor, iterations, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::dilate( const GpuMat& src, GpuMat& dst, const Mat& kernel, Point anchor, int iterations)
|
void cv::gpu::dilate( const GpuMat& src, GpuMat& dst, const Mat& kernel, Point anchor, int iterations, Stream& stream)
|
||||||
{
|
{
|
||||||
morphOp(MORPH_DILATE, src, dst, kernel, anchor, iterations);
|
morphOp(MORPH_DILATE, src, dst, kernel, anchor, iterations, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::morphologyEx( const GpuMat& src, GpuMat& dst, int op, const Mat& kernel, Point anchor, int iterations)
|
void cv::gpu::morphologyEx( const GpuMat& src, GpuMat& dst, int op, const Mat& kernel, Point anchor, int iterations, Stream& stream)
|
||||||
{
|
{
|
||||||
GpuMat temp;
|
GpuMat temp;
|
||||||
switch( op )
|
switch( op )
|
||||||
{
|
{
|
||||||
case MORPH_ERODE: erode( src, dst, kernel, anchor, iterations); break;
|
case MORPH_ERODE: erode( src, dst, kernel, anchor, iterations, stream); break;
|
||||||
case MORPH_DILATE: dilate( src, dst, kernel, anchor, iterations); break;
|
case MORPH_DILATE: dilate( src, dst, kernel, anchor, iterations, stream); break;
|
||||||
case MORPH_OPEN:
|
case MORPH_OPEN:
|
||||||
erode( src, temp, kernel, anchor, iterations);
|
erode( src, temp, kernel, anchor, iterations, stream);
|
||||||
dilate( temp, dst, kernel, anchor, iterations);
|
dilate( temp, dst, kernel, anchor, iterations, stream);
|
||||||
break;
|
break;
|
||||||
case CV_MOP_CLOSE:
|
case CV_MOP_CLOSE:
|
||||||
dilate( src, temp, kernel, anchor, iterations);
|
dilate( src, temp, kernel, anchor, iterations, stream);
|
||||||
erode( temp, dst, kernel, anchor, iterations);
|
erode( temp, dst, kernel, anchor, iterations, stream);
|
||||||
break;
|
break;
|
||||||
case CV_MOP_GRADIENT:
|
case CV_MOP_GRADIENT:
|
||||||
erode( src, temp, kernel, anchor, iterations);
|
erode( src, temp, kernel, anchor, iterations, stream);
|
||||||
dilate( src, dst, kernel, anchor, iterations);
|
dilate( src, dst, kernel, anchor, iterations, stream);
|
||||||
subtract(dst, temp, dst);
|
subtract(dst, temp, dst, stream);
|
||||||
break;
|
break;
|
||||||
case CV_MOP_TOPHAT:
|
case CV_MOP_TOPHAT:
|
||||||
erode( src, dst, kernel, anchor, iterations);
|
erode( src, dst, kernel, anchor, iterations, stream);
|
||||||
dilate( dst, temp, kernel, anchor, iterations);
|
dilate( dst, temp, kernel, anchor, iterations, stream);
|
||||||
subtract(src, temp, dst);
|
subtract(src, temp, dst, stream);
|
||||||
break;
|
break;
|
||||||
case CV_MOP_BLACKHAT:
|
case CV_MOP_BLACKHAT:
|
||||||
dilate( src, dst, kernel, anchor, iterations);
|
dilate( src, dst, kernel, anchor, iterations, stream);
|
||||||
erode( dst, temp, kernel, anchor, iterations);
|
erode( dst, temp, kernel, anchor, iterations, stream);
|
||||||
subtract(temp, src, dst);
|
subtract(temp, src, dst, stream);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
CV_Error( CV_StsBadArg, "unknown morphological operation" );
|
CV_Error( CV_StsBadArg, "unknown morphological operation" );
|
||||||
@@ -531,7 +567,7 @@ namespace
|
|||||||
NPPLinearFilter(const Size& ksize_, const Point& anchor_, const GpuMat& kernel_, Npp32s nDivisor_, nppFilter2D_t func_) :
|
NPPLinearFilter(const Size& ksize_, const Point& anchor_, const GpuMat& kernel_, Npp32s nDivisor_, nppFilter2D_t func_) :
|
||||||
BaseFilter_GPU(ksize_, anchor_), kernel(kernel_), nDivisor(nDivisor_), func(func_) {}
|
BaseFilter_GPU(ksize_, anchor_), kernel(kernel_), nDivisor(nDivisor_), func(func_) {}
|
||||||
|
|
||||||
virtual void operator()(const GpuMat& src, GpuMat& dst)
|
virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null())
|
||||||
{
|
{
|
||||||
NppiSize sz;
|
NppiSize sz;
|
||||||
sz.width = src.cols;
|
sz.width = src.cols;
|
||||||
@@ -543,10 +579,15 @@ namespace
|
|||||||
oAnchor.x = anchor.x;
|
oAnchor.x = anchor.x;
|
||||||
oAnchor.y = anchor.y;
|
oAnchor.y = anchor.y;
|
||||||
|
|
||||||
|
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||||
|
|
||||||
|
NppStreamHandler h(stream);
|
||||||
|
|
||||||
nppSafeCall( func(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz,
|
nppSafeCall( func(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz,
|
||||||
kernel.ptr<Npp32s>(), oKernelSize, oAnchor, nDivisor) );
|
kernel.ptr<Npp32s>(), oKernelSize, oAnchor, nDivisor) );
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
if (stream == 0)
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
GpuMat kernel;
|
GpuMat kernel;
|
||||||
@@ -578,7 +619,7 @@ Ptr<FilterEngine_GPU> cv::gpu::createLinearFilter_GPU(int srcType, int dstType,
|
|||||||
return createFilter2D_GPU(linearFilter, srcType, dstType);
|
return createFilter2D_GPU(linearFilter, srcType, dstType);
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernel, Point anchor)
|
void cv::gpu::filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernel, Point anchor, Stream& stream)
|
||||||
{
|
{
|
||||||
if( ddepth < 0 )
|
if( ddepth < 0 )
|
||||||
ddepth = src.depth();
|
ddepth = src.depth();
|
||||||
@@ -586,7 +627,7 @@ void cv::gpu::filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& ke
|
|||||||
dst.create(src.size(), CV_MAKETYPE(ddepth, src.channels()));
|
dst.create(src.size(), CV_MAKETYPE(ddepth, src.channels()));
|
||||||
|
|
||||||
Ptr<FilterEngine_GPU> f = createLinearFilter_GPU(src.type(), dst.type(), kernel, anchor);
|
Ptr<FilterEngine_GPU> f = createLinearFilter_GPU(src.type(), dst.type(), kernel, anchor);
|
||||||
f->apply(src, dst);
|
f->apply(src, dst, Rect(0, 0, -1, -1), stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
@@ -595,10 +636,10 @@ void cv::gpu::filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& ke
|
|||||||
namespace cv { namespace gpu { namespace filters
|
namespace cv { namespace gpu { namespace filters
|
||||||
{
|
{
|
||||||
template <typename T, typename D>
|
template <typename T, typename D>
|
||||||
void linearRowFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type);
|
void linearRowFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T, typename D>
|
template <typename T, typename D>
|
||||||
void linearColumnFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type);
|
void linearColumnFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
@@ -606,7 +647,7 @@ namespace
|
|||||||
typedef NppStatus (*nppFilter1D_t)(const Npp8u * pSrc, Npp32s nSrcStep, Npp8u * pDst, Npp32s nDstStep, NppiSize oROI,
|
typedef NppStatus (*nppFilter1D_t)(const Npp8u * pSrc, Npp32s nSrcStep, Npp8u * pDst, Npp32s nDstStep, NppiSize oROI,
|
||||||
const Npp32s * pKernel, Npp32s nMaskSize, Npp32s nAnchor, Npp32s nDivisor);
|
const Npp32s * pKernel, Npp32s nMaskSize, Npp32s nAnchor, Npp32s nDivisor);
|
||||||
|
|
||||||
typedef void (*gpuFilter1D_t)(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type);
|
typedef void (*gpuFilter1D_t)(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
||||||
|
|
||||||
class NppLinearRowFilter : public BaseRowFilter_GPU
|
class NppLinearRowFilter : public BaseRowFilter_GPU
|
||||||
{
|
{
|
||||||
@@ -614,15 +655,20 @@ namespace
|
|||||||
NppLinearRowFilter(int ksize_, int anchor_, const GpuMat& kernel_, Npp32s nDivisor_, nppFilter1D_t func_) :
|
NppLinearRowFilter(int ksize_, int anchor_, const GpuMat& kernel_, Npp32s nDivisor_, nppFilter1D_t func_) :
|
||||||
BaseRowFilter_GPU(ksize_, anchor_), kernel(kernel_), nDivisor(nDivisor_), func(func_) {}
|
BaseRowFilter_GPU(ksize_, anchor_), kernel(kernel_), nDivisor(nDivisor_), func(func_) {}
|
||||||
|
|
||||||
virtual void operator()(const GpuMat& src, GpuMat& dst)
|
virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null())
|
||||||
{
|
{
|
||||||
NppiSize sz;
|
NppiSize sz;
|
||||||
sz.width = src.cols;
|
sz.width = src.cols;
|
||||||
sz.height = src.rows;
|
sz.height = src.rows;
|
||||||
|
|
||||||
|
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||||
|
|
||||||
|
NppStreamHandler h(stream);
|
||||||
|
|
||||||
nppSafeCall( func(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz, kernel.ptr<Npp32s>(), ksize, anchor, nDivisor) );
|
nppSafeCall( func(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz, kernel.ptr<Npp32s>(), ksize, anchor, nDivisor) );
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
if (stream == 0)
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
GpuMat kernel;
|
GpuMat kernel;
|
||||||
@@ -636,9 +682,9 @@ namespace
|
|||||||
GpuLinearRowFilter(int ksize_, int anchor_, const Mat& kernel_, gpuFilter1D_t func_, int brd_type_) :
|
GpuLinearRowFilter(int ksize_, int anchor_, const Mat& kernel_, gpuFilter1D_t func_, int brd_type_) :
|
||||||
BaseRowFilter_GPU(ksize_, anchor_), kernel(kernel_), func(func_), brd_type(brd_type_) {}
|
BaseRowFilter_GPU(ksize_, anchor_), kernel(kernel_), func(func_), brd_type(brd_type_) {}
|
||||||
|
|
||||||
virtual void operator()(const GpuMat& src, GpuMat& dst)
|
virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null())
|
||||||
{
|
{
|
||||||
func(src, dst, kernel.ptr<float>(), ksize, anchor, brd_type);
|
func(src, dst, kernel.ptr<float>(), ksize, anchor, brd_type, StreamAccessor::getStream(s));
|
||||||
}
|
}
|
||||||
|
|
||||||
Mat kernel;
|
Mat kernel;
|
||||||
@@ -720,15 +766,20 @@ namespace
|
|||||||
NppLinearColumnFilter(int ksize_, int anchor_, const GpuMat& kernel_, Npp32s nDivisor_, nppFilter1D_t func_) :
|
NppLinearColumnFilter(int ksize_, int anchor_, const GpuMat& kernel_, Npp32s nDivisor_, nppFilter1D_t func_) :
|
||||||
BaseColumnFilter_GPU(ksize_, anchor_), kernel(kernel_), nDivisor(nDivisor_), func(func_) {}
|
BaseColumnFilter_GPU(ksize_, anchor_), kernel(kernel_), nDivisor(nDivisor_), func(func_) {}
|
||||||
|
|
||||||
virtual void operator()(const GpuMat& src, GpuMat& dst)
|
virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null())
|
||||||
{
|
{
|
||||||
NppiSize sz;
|
NppiSize sz;
|
||||||
sz.width = src.cols;
|
sz.width = src.cols;
|
||||||
sz.height = src.rows;
|
sz.height = src.rows;
|
||||||
|
|
||||||
|
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||||
|
|
||||||
|
NppStreamHandler h(stream);
|
||||||
|
|
||||||
nppSafeCall( func(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz, kernel.ptr<Npp32s>(), ksize, anchor, nDivisor) );
|
nppSafeCall( func(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz, kernel.ptr<Npp32s>(), ksize, anchor, nDivisor) );
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
if (stream == 0)
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
GpuMat kernel;
|
GpuMat kernel;
|
||||||
@@ -742,9 +793,9 @@ namespace
|
|||||||
GpuLinearColumnFilter(int ksize_, int anchor_, const Mat& kernel_, gpuFilter1D_t func_, int brd_type_) :
|
GpuLinearColumnFilter(int ksize_, int anchor_, const Mat& kernel_, gpuFilter1D_t func_, int brd_type_) :
|
||||||
BaseColumnFilter_GPU(ksize_, anchor_), kernel(kernel_), func(func_), brd_type(brd_type_) {}
|
BaseColumnFilter_GPU(ksize_, anchor_), kernel(kernel_), func(func_), brd_type(brd_type_) {}
|
||||||
|
|
||||||
virtual void operator()(const GpuMat& src, GpuMat& dst)
|
virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null())
|
||||||
{
|
{
|
||||||
func(src, dst, kernel.ptr<float>(), ksize, anchor, brd_type);
|
func(src, dst, kernel.ptr<float>(), ksize, anchor, brd_type, StreamAccessor::getStream(s));
|
||||||
}
|
}
|
||||||
|
|
||||||
Mat kernel;
|
Mat kernel;
|
||||||
@@ -834,7 +885,8 @@ Ptr<FilterEngine_GPU> cv::gpu::createSeparableLinearFilter_GPU(int srcType, int
|
|||||||
return createSeparableFilter_GPU(rowFilter, columnFilter, srcType, bufType, dstType);
|
return createSeparableFilter_GPU(rowFilter, columnFilter, srcType, bufType, dstType);
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY, Point anchor, int rowBorderType, int columnBorderType)
|
void cv::gpu::sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY, Point anchor, int rowBorderType, int columnBorderType,
|
||||||
|
Stream& stream)
|
||||||
{
|
{
|
||||||
if( ddepth < 0 )
|
if( ddepth < 0 )
|
||||||
ddepth = src.depth();
|
ddepth = src.depth();
|
||||||
@@ -842,7 +894,7 @@ void cv::gpu::sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat&
|
|||||||
dst.create(src.size(), CV_MAKETYPE(ddepth, src.channels()));
|
dst.create(src.size(), CV_MAKETYPE(ddepth, src.channels()));
|
||||||
|
|
||||||
Ptr<FilterEngine_GPU> f = createSeparableLinearFilter_GPU(src.type(), dst.type(), kernelX, kernelY, anchor, rowBorderType, columnBorderType);
|
Ptr<FilterEngine_GPU> f = createSeparableLinearFilter_GPU(src.type(), dst.type(), kernelX, kernelY, anchor, rowBorderType, columnBorderType);
|
||||||
f->apply(src, dst, Rect(0, 0, src.cols, src.rows));
|
f->apply(src, dst, Rect(0, 0, src.cols, src.rows), stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
@@ -855,7 +907,7 @@ Ptr<FilterEngine_GPU> cv::gpu::createDerivFilter_GPU(int srcType, int dstType, i
|
|||||||
return createSeparableLinearFilter_GPU(srcType, dstType, kx, ky, Point(-1,-1), rowBorderType, columnBorderType);
|
return createSeparableLinearFilter_GPU(srcType, dstType, kx, ky, Point(-1,-1), rowBorderType, columnBorderType);
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, int ksize, double scale, int rowBorderType, int columnBorderType)
|
void cv::gpu::Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, int ksize, double scale, int rowBorderType, int columnBorderType, Stream& stream)
|
||||||
{
|
{
|
||||||
Mat kx, ky;
|
Mat kx, ky;
|
||||||
getDerivKernels(kx, ky, dx, dy, ksize, false, CV_32F);
|
getDerivKernels(kx, ky, dx, dy, ksize, false, CV_32F);
|
||||||
@@ -870,10 +922,10 @@ void cv::gpu::Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy,
|
|||||||
ky *= scale;
|
ky *= scale;
|
||||||
}
|
}
|
||||||
|
|
||||||
sepFilter2D(src, dst, ddepth, kx, ky, Point(-1,-1), rowBorderType, columnBorderType);
|
sepFilter2D(src, dst, ddepth, kx, ky, Point(-1,-1), rowBorderType, columnBorderType, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::Scharr(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, double scale, int rowBorderType, int columnBorderType)
|
void cv::gpu::Scharr(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, double scale, int rowBorderType, int columnBorderType, Stream& stream)
|
||||||
{
|
{
|
||||||
Mat kx, ky;
|
Mat kx, ky;
|
||||||
getDerivKernels(kx, ky, dx, dy, -1, false, CV_32F);
|
getDerivKernels(kx, ky, dx, dy, -1, false, CV_32F);
|
||||||
@@ -888,10 +940,10 @@ void cv::gpu::Scharr(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy,
|
|||||||
ky *= scale;
|
ky *= scale;
|
||||||
}
|
}
|
||||||
|
|
||||||
sepFilter2D(src, dst, ddepth, kx, ky, Point(-1,-1), rowBorderType, columnBorderType);
|
sepFilter2D(src, dst, ddepth, kx, ky, Point(-1,-1), rowBorderType, columnBorderType, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::Laplacian(const GpuMat& src, GpuMat& dst, int ddepth, int ksize, double scale)
|
void cv::gpu::Laplacian(const GpuMat& src, GpuMat& dst, int ddepth, int ksize, double scale, Stream& stream)
|
||||||
{
|
{
|
||||||
CV_Assert(ksize == 1 || ksize == 3);
|
CV_Assert(ksize == 1 || ksize == 3);
|
||||||
|
|
||||||
@@ -904,7 +956,7 @@ void cv::gpu::Laplacian(const GpuMat& src, GpuMat& dst, int ddepth, int ksize, d
|
|||||||
if (scale != 1)
|
if (scale != 1)
|
||||||
kernel *= scale;
|
kernel *= scale;
|
||||||
|
|
||||||
filter2D(src, dst, ddepth, kernel, Point(-1,-1));
|
filter2D(src, dst, ddepth, kernel, Point(-1,-1), stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
@@ -938,7 +990,7 @@ Ptr<FilterEngine_GPU> cv::gpu::createGaussianFilter_GPU(int type, Size ksize, do
|
|||||||
return createSeparableLinearFilter_GPU(type, type, kx, ky, Point(-1,-1), rowBorderType, columnBorderType);
|
return createSeparableLinearFilter_GPU(type, type, kx, ky, Point(-1,-1), rowBorderType, columnBorderType);
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, double sigma1, double sigma2, int rowBorderType, int columnBorderType)
|
void cv::gpu::GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, double sigma1, double sigma2, int rowBorderType, int columnBorderType, Stream& stream)
|
||||||
{
|
{
|
||||||
if (ksize.width == 1 && ksize.height == 1)
|
if (ksize.width == 1 && ksize.height == 1)
|
||||||
{
|
{
|
||||||
@@ -949,7 +1001,7 @@ void cv::gpu::GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, double si
|
|||||||
dst.create(src.size(), src.type());
|
dst.create(src.size(), src.type());
|
||||||
|
|
||||||
Ptr<FilterEngine_GPU> f = createGaussianFilter_GPU(src.type(), ksize, sigma1, sigma2, rowBorderType, columnBorderType);
|
Ptr<FilterEngine_GPU> f = createGaussianFilter_GPU(src.type(), ksize, sigma1, sigma2, rowBorderType, columnBorderType);
|
||||||
f->apply(src, dst, Rect(0, 0, src.cols, src.rows));
|
f->apply(src, dst, Rect(0, 0, src.cols, src.rows), stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
@@ -965,7 +1017,7 @@ namespace
|
|||||||
public:
|
public:
|
||||||
NPPRankFilter(const Size& ksize_, const Point& anchor_, nppFilterRank_t func_) : BaseFilter_GPU(ksize_, anchor_), func(func_) {}
|
NPPRankFilter(const Size& ksize_, const Point& anchor_, nppFilterRank_t func_) : BaseFilter_GPU(ksize_, anchor_), func(func_) {}
|
||||||
|
|
||||||
virtual void operator()(const GpuMat& src, GpuMat& dst)
|
virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null())
|
||||||
{
|
{
|
||||||
NppiSize sz;
|
NppiSize sz;
|
||||||
sz.width = src.cols;
|
sz.width = src.cols;
|
||||||
@@ -977,9 +1029,14 @@ namespace
|
|||||||
oAnchor.x = anchor.x;
|
oAnchor.x = anchor.x;
|
||||||
oAnchor.y = anchor.y;
|
oAnchor.y = anchor.y;
|
||||||
|
|
||||||
|
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||||
|
|
||||||
|
NppStreamHandler h(stream);
|
||||||
|
|
||||||
nppSafeCall( func(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz, oKernelSize, oAnchor) );
|
nppSafeCall( func(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz, oKernelSize, oAnchor) );
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
if (stream == 0)
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
nppFilterRank_t func;
|
nppFilterRank_t func;
|
||||||
|
|||||||
@@ -44,11 +44,11 @@
|
|||||||
|
|
||||||
#if !defined (HAVE_CUDA)
|
#if !defined (HAVE_CUDA)
|
||||||
|
|
||||||
void cv::gpu::graphcut(GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&) { throw_nogpu(); }
|
void cv::gpu::graphcut(GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||||
|
|
||||||
#else /* !defined (HAVE_CUDA) */
|
#else /* !defined (HAVE_CUDA) */
|
||||||
|
|
||||||
void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& bottom, GpuMat& labels, GpuMat& buf)
|
void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& bottom, GpuMat& labels, GpuMat& buf, Stream& s)
|
||||||
{
|
{
|
||||||
Size src_size = terminals.size();
|
Size src_size = terminals.size();
|
||||||
CV_Assert(terminals.type() == CV_32S);
|
CV_Assert(terminals.type() == CV_32S);
|
||||||
@@ -73,17 +73,17 @@ void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTrans
|
|||||||
if ((size_t)bufsz > buf.cols * buf.rows * buf.elemSize())
|
if ((size_t)bufsz > buf.cols * buf.rows * buf.elemSize())
|
||||||
buf.create(1, bufsz, CV_8U);
|
buf.create(1, bufsz, CV_8U);
|
||||||
|
|
||||||
|
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||||
|
|
||||||
|
NppStreamHandler h(stream);
|
||||||
|
|
||||||
nppSafeCall( nppiGraphcut_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(), top.ptr<Npp32s>(), bottom.ptr<Npp32s>(),
|
nppSafeCall( nppiGraphcut_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(), top.ptr<Npp32s>(), bottom.ptr<Npp32s>(),
|
||||||
terminals.step, leftTransp.step, sznpp, labels.ptr<Npp8u>(), labels.step, buf.ptr<Npp8u>()) );
|
terminals.step, leftTransp.step, sznpp, labels.ptr<Npp8u>(), labels.step, buf.ptr<Npp8u>()) );
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
if (stream == 0)
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#endif /* !defined (HAVE_CUDA) */
|
#endif /* !defined (HAVE_CUDA) */
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -50,30 +50,24 @@ using namespace cv::gpu;
|
|||||||
void cv::gpu::remap(const GpuMat&, GpuMat&, const GpuMat&, const GpuMat&){ throw_nogpu(); }
|
void cv::gpu::remap(const GpuMat&, GpuMat&, const GpuMat&, const GpuMat&){ throw_nogpu(); }
|
||||||
void cv::gpu::meanShiftFiltering(const GpuMat&, GpuMat&, int, int, TermCriteria) { throw_nogpu(); }
|
void cv::gpu::meanShiftFiltering(const GpuMat&, GpuMat&, int, int, TermCriteria) { throw_nogpu(); }
|
||||||
void cv::gpu::meanShiftProc(const GpuMat&, GpuMat&, GpuMat&, int, int, TermCriteria) { throw_nogpu(); }
|
void cv::gpu::meanShiftProc(const GpuMat&, GpuMat&, GpuMat&, int, int, TermCriteria) { throw_nogpu(); }
|
||||||
void cv::gpu::drawColorDisp(const GpuMat&, GpuMat&, int) { throw_nogpu(); }
|
void cv::gpu::drawColorDisp(const GpuMat&, GpuMat&, int, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::drawColorDisp(const GpuMat&, GpuMat&, int, const Stream&) { throw_nogpu(); }
|
void cv::gpu::reprojectImageTo3D(const GpuMat&, GpuMat&, const Mat&, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::reprojectImageTo3D(const GpuMat&, GpuMat&, const Mat&) { throw_nogpu(); }
|
void cv::gpu::resize(const GpuMat&, GpuMat&, Size, double, double, int, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::reprojectImageTo3D(const GpuMat&, GpuMat&, const Mat&, const Stream&) { throw_nogpu(); }
|
void cv::gpu::copyMakeBorder(const GpuMat&, GpuMat&, int, int, int, int, const Scalar&, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::resize(const GpuMat&, GpuMat&, Size, double, double, int) { throw_nogpu(); }
|
void cv::gpu::warpAffine(const GpuMat&, GpuMat&, const Mat&, Size, int, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::copyMakeBorder(const GpuMat&, GpuMat&, int, int, int, int, const Scalar&) { throw_nogpu(); }
|
void cv::gpu::warpPerspective(const GpuMat&, GpuMat&, const Mat&, Size, int, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::warpAffine(const GpuMat&, GpuMat&, const Mat&, Size, int) { throw_nogpu(); }
|
void cv::gpu::rotate(const GpuMat&, GpuMat&, Size, double, double, double, int, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::warpPerspective(const GpuMat&, GpuMat&, const Mat&, Size, int) { throw_nogpu(); }
|
void cv::gpu::integral(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::rotate(const GpuMat&, GpuMat&, Size, double, double, double, int) { throw_nogpu(); }
|
void cv::gpu::integralBuffered(const GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::integral(const GpuMat&, GpuMat&) { throw_nogpu(); }
|
void cv::gpu::integral(const GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::integralBuffered(const GpuMat&, GpuMat&, GpuMat&) { throw_nogpu(); }
|
void cv::gpu::sqrIntegral(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::integral(const GpuMat&, GpuMat&, GpuMat&) { throw_nogpu(); }
|
|
||||||
void cv::gpu::sqrIntegral(const GpuMat&, GpuMat&) { throw_nogpu(); }
|
|
||||||
void cv::gpu::columnSum(const GpuMat&, GpuMat&) { throw_nogpu(); }
|
void cv::gpu::columnSum(const GpuMat&, GpuMat&) { throw_nogpu(); }
|
||||||
void cv::gpu::rectStdDev(const GpuMat&, const GpuMat&, GpuMat&, const Rect&) { throw_nogpu(); }
|
void cv::gpu::rectStdDev(const GpuMat&, const GpuMat&, GpuMat&, const Rect&, Stream&) { throw_nogpu(); }
|
||||||
//void cv::gpu::Canny(const GpuMat&, GpuMat&, double, double, int) { throw_nogpu(); }
|
|
||||||
//void cv::gpu::Canny(const GpuMat&, GpuMat&, GpuMat&, double, double, int) { throw_nogpu(); }
|
|
||||||
//void cv::gpu::Canny(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, double, double, int) { throw_nogpu(); }
|
|
||||||
//void cv::gpu::Canny(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat&, double, double, int) { throw_nogpu(); }
|
|
||||||
void cv::gpu::evenLevels(GpuMat&, int, int, int) { throw_nogpu(); }
|
void cv::gpu::evenLevels(GpuMat&, int, int, int) { throw_nogpu(); }
|
||||||
void cv::gpu::histEven(const GpuMat&, GpuMat&, int, int, int) { throw_nogpu(); }
|
void cv::gpu::histEven(const GpuMat&, GpuMat&, int, int, int, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::histEven(const GpuMat&, GpuMat*, int*, int*, int*) { throw_nogpu(); }
|
void cv::gpu::histEven(const GpuMat&, GpuMat*, int*, int*, int*, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::histRange(const GpuMat&, GpuMat&, const GpuMat&) { throw_nogpu(); }
|
void cv::gpu::histRange(const GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::histRange(const GpuMat&, GpuMat*, const GpuMat*) { throw_nogpu(); }
|
void cv::gpu::histRange(const GpuMat&, GpuMat*, const GpuMat*, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::cornerHarris(const GpuMat&, GpuMat&, int, int, double, int) { throw_nogpu(); }
|
void cv::gpu::cornerHarris(const GpuMat&, GpuMat&, int, int, double, int) { throw_nogpu(); }
|
||||||
void cv::gpu::cornerMinEigenVal(const GpuMat&, GpuMat&, int, int, int) { throw_nogpu(); }
|
void cv::gpu::cornerMinEigenVal(const GpuMat&, GpuMat&, int, int, int) { throw_nogpu(); }
|
||||||
void cv::gpu::mulSpectrums(const GpuMat&, const GpuMat&, GpuMat&, int, bool) { throw_nogpu(); }
|
void cv::gpu::mulSpectrums(const GpuMat&, const GpuMat&, GpuMat&, int, bool) { throw_nogpu(); }
|
||||||
@@ -203,14 +197,7 @@ namespace
|
|||||||
const drawColorDisp_caller_t drawColorDisp_callers[] = {drawColorDisp_caller<unsigned char>, 0, 0, drawColorDisp_caller<short>, 0, 0, 0, 0};
|
const drawColorDisp_caller_t drawColorDisp_callers[] = {drawColorDisp_caller<unsigned char>, 0, 0, drawColorDisp_caller<short>, 0, 0, 0, 0};
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::drawColorDisp(const GpuMat& src, GpuMat& dst, int ndisp)
|
void cv::gpu::drawColorDisp(const GpuMat& src, GpuMat& dst, int ndisp, Stream& stream)
|
||||||
{
|
|
||||||
CV_Assert(src.type() == CV_8U || src.type() == CV_16S);
|
|
||||||
|
|
||||||
drawColorDisp_callers[src.type()](src, dst, ndisp, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
void cv::gpu::drawColorDisp(const GpuMat& src, GpuMat& dst, int ndisp, const Stream& stream)
|
|
||||||
{
|
{
|
||||||
CV_Assert(src.type() == CV_8U || src.type() == CV_16S);
|
CV_Assert(src.type() == CV_8U || src.type() == CV_16S);
|
||||||
|
|
||||||
@@ -234,14 +221,7 @@ namespace
|
|||||||
const reprojectImageTo3D_caller_t reprojectImageTo3D_callers[] = {reprojectImageTo3D_caller<unsigned char>, 0, 0, reprojectImageTo3D_caller<short>, 0, 0, 0, 0};
|
const reprojectImageTo3D_caller_t reprojectImageTo3D_callers[] = {reprojectImageTo3D_caller<unsigned char>, 0, 0, reprojectImageTo3D_caller<short>, 0, 0, 0, 0};
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::reprojectImageTo3D(const GpuMat& disp, GpuMat& xyzw, const Mat& Q)
|
void cv::gpu::reprojectImageTo3D(const GpuMat& disp, GpuMat& xyzw, const Mat& Q, Stream& stream)
|
||||||
{
|
|
||||||
CV_Assert((disp.type() == CV_8U || disp.type() == CV_16S) && Q.type() == CV_32F && Q.rows == 4 && Q.cols == 4);
|
|
||||||
|
|
||||||
reprojectImageTo3D_callers[disp.type()](disp, xyzw, Q, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
void cv::gpu::reprojectImageTo3D(const GpuMat& disp, GpuMat& xyzw, const Mat& Q, const Stream& stream)
|
|
||||||
{
|
{
|
||||||
CV_Assert((disp.type() == CV_8U || disp.type() == CV_16S) && Q.type() == CV_32F && Q.rows == 4 && Q.cols == 4);
|
CV_Assert((disp.type() == CV_8U || disp.type() == CV_16S) && Q.type() == CV_32F && Q.rows == 4 && Q.cols == 4);
|
||||||
|
|
||||||
@@ -251,7 +231,7 @@ void cv::gpu::reprojectImageTo3D(const GpuMat& disp, GpuMat& xyzw, const Mat& Q,
|
|||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// resize
|
// resize
|
||||||
|
|
||||||
void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, double fy, int interpolation)
|
void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, double fy, int interpolation, Stream& s)
|
||||||
{
|
{
|
||||||
static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR/*, NPPI_INTER_CUBIC, 0, NPPI_INTER_LANCZOS*/};
|
static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR/*, NPPI_INTER_CUBIC, 0, NPPI_INTER_LANCZOS*/};
|
||||||
|
|
||||||
@@ -284,6 +264,10 @@ void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, doub
|
|||||||
dstsz.width = dst.cols;
|
dstsz.width = dst.cols;
|
||||||
dstsz.height = dst.rows;
|
dstsz.height = dst.rows;
|
||||||
|
|
||||||
|
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||||
|
|
||||||
|
NppStreamHandler h(stream);
|
||||||
|
|
||||||
if (src.type() == CV_8UC1)
|
if (src.type() == CV_8UC1)
|
||||||
{
|
{
|
||||||
nppSafeCall( nppiResize_8u_C1R(src.ptr<Npp8u>(), srcsz, src.step, srcrect,
|
nppSafeCall( nppiResize_8u_C1R(src.ptr<Npp8u>(), srcsz, src.step, srcrect,
|
||||||
@@ -295,13 +279,14 @@ void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, doub
|
|||||||
dst.ptr<Npp8u>(), dst.step, dstsz, fx, fy, npp_inter[interpolation]) );
|
dst.ptr<Npp8u>(), dst.step, dstsz, fx, fy, npp_inter[interpolation]) );
|
||||||
}
|
}
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
if (stream == 0)
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// copyMakeBorder
|
// copyMakeBorder
|
||||||
|
|
||||||
void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom, int left, int right, const Scalar& value)
|
void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom, int left, int right, const Scalar& value, Stream& s)
|
||||||
{
|
{
|
||||||
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4 || src.type() == CV_32SC1 || src.type() == CV_32FC1);
|
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4 || src.type() == CV_32SC1 || src.type() == CV_32FC1);
|
||||||
|
|
||||||
@@ -314,6 +299,10 @@ void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom
|
|||||||
dstsz.width = dst.cols;
|
dstsz.width = dst.cols;
|
||||||
dstsz.height = dst.rows;
|
dstsz.height = dst.rows;
|
||||||
|
|
||||||
|
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||||
|
|
||||||
|
NppStreamHandler h(stream);
|
||||||
|
|
||||||
switch (src.type())
|
switch (src.type())
|
||||||
{
|
{
|
||||||
case CV_8UC1:
|
case CV_8UC1:
|
||||||
@@ -349,7 +338,8 @@ void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom
|
|||||||
CV_Assert(!"Unsupported source type");
|
CV_Assert(!"Unsupported source type");
|
||||||
}
|
}
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
if (stream == 0)
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
@@ -372,7 +362,7 @@ namespace
|
|||||||
|
|
||||||
void nppWarpCaller(const GpuMat& src, GpuMat& dst, double coeffs[][3], const Size& dsize, int flags,
|
void nppWarpCaller(const GpuMat& src, GpuMat& dst, double coeffs[][3], const Size& dsize, int flags,
|
||||||
npp_warp_8u_t npp_warp_8u[][2], npp_warp_16u_t npp_warp_16u[][2],
|
npp_warp_8u_t npp_warp_8u[][2], npp_warp_16u_t npp_warp_16u[][2],
|
||||||
npp_warp_32s_t npp_warp_32s[][2], npp_warp_32f_t npp_warp_32f[][2])
|
npp_warp_32s_t npp_warp_32s[][2], npp_warp_32f_t npp_warp_32f[][2], cudaStream_t stream)
|
||||||
{
|
{
|
||||||
static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC};
|
static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC};
|
||||||
|
|
||||||
@@ -397,6 +387,8 @@ namespace
|
|||||||
|
|
||||||
int warpInd = (flags & WARP_INVERSE_MAP) >> 4;
|
int warpInd = (flags & WARP_INVERSE_MAP) >> 4;
|
||||||
|
|
||||||
|
NppStreamHandler h(stream);
|
||||||
|
|
||||||
switch (src.depth())
|
switch (src.depth())
|
||||||
{
|
{
|
||||||
case CV_8U:
|
case CV_8U:
|
||||||
@@ -419,11 +411,12 @@ namespace
|
|||||||
CV_Assert(!"Unsupported source type");
|
CV_Assert(!"Unsupported source type");
|
||||||
}
|
}
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
if (stream == 0)
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::warpAffine(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags)
|
void cv::gpu::warpAffine(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags, Stream& s)
|
||||||
{
|
{
|
||||||
static npp_warp_8u_t npp_warpAffine_8u[][2] =
|
static npp_warp_8u_t npp_warpAffine_8u[][2] =
|
||||||
{
|
{
|
||||||
@@ -464,10 +457,10 @@ void cv::gpu::warpAffine(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsiz
|
|||||||
Mat coeffsMat(2, 3, CV_64F, (void*)coeffs);
|
Mat coeffsMat(2, 3, CV_64F, (void*)coeffs);
|
||||||
M.convertTo(coeffsMat, coeffsMat.type());
|
M.convertTo(coeffsMat, coeffsMat.type());
|
||||||
|
|
||||||
nppWarpCaller(src, dst, coeffs, dsize, flags, npp_warpAffine_8u, npp_warpAffine_16u, npp_warpAffine_32s, npp_warpAffine_32f);
|
nppWarpCaller(src, dst, coeffs, dsize, flags, npp_warpAffine_8u, npp_warpAffine_16u, npp_warpAffine_32s, npp_warpAffine_32f, StreamAccessor::getStream(s));
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags)
|
void cv::gpu::warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags, Stream& s)
|
||||||
{
|
{
|
||||||
static npp_warp_8u_t npp_warpPerspective_8u[][2] =
|
static npp_warp_8u_t npp_warpPerspective_8u[][2] =
|
||||||
{
|
{
|
||||||
@@ -508,13 +501,13 @@ void cv::gpu::warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size
|
|||||||
Mat coeffsMat(3, 3, CV_64F, (void*)coeffs);
|
Mat coeffsMat(3, 3, CV_64F, (void*)coeffs);
|
||||||
M.convertTo(coeffsMat, coeffsMat.type());
|
M.convertTo(coeffsMat, coeffsMat.type());
|
||||||
|
|
||||||
nppWarpCaller(src, dst, coeffs, dsize, flags, npp_warpPerspective_8u, npp_warpPerspective_16u, npp_warpPerspective_32s, npp_warpPerspective_32f);
|
nppWarpCaller(src, dst, coeffs, dsize, flags, npp_warpPerspective_8u, npp_warpPerspective_16u, npp_warpPerspective_32s, npp_warpPerspective_32f, StreamAccessor::getStream(s));
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// rotate
|
// rotate
|
||||||
|
|
||||||
void cv::gpu::rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift, double yShift, int interpolation)
|
void cv::gpu::rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift, double yShift, int interpolation, Stream& s)
|
||||||
{
|
{
|
||||||
static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC};
|
static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC};
|
||||||
|
|
||||||
@@ -535,6 +528,10 @@ void cv::gpu::rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, d
|
|||||||
dstroi.height = dst.rows;
|
dstroi.height = dst.rows;
|
||||||
dstroi.width = dst.cols;
|
dstroi.width = dst.cols;
|
||||||
|
|
||||||
|
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||||
|
|
||||||
|
NppStreamHandler h(stream);
|
||||||
|
|
||||||
if (src.type() == CV_8UC1)
|
if (src.type() == CV_8UC1)
|
||||||
{
|
{
|
||||||
nppSafeCall( nppiRotate_8u_C1R(src.ptr<Npp8u>(), srcsz, src.step, srcroi,
|
nppSafeCall( nppiRotate_8u_C1R(src.ptr<Npp8u>(), srcsz, src.step, srcroi,
|
||||||
@@ -546,19 +543,20 @@ void cv::gpu::rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, d
|
|||||||
dst.ptr<Npp8u>(), dst.step, dstroi, angle, xShift, yShift, npp_inter[interpolation]) );
|
dst.ptr<Npp8u>(), dst.step, dstroi, angle, xShift, yShift, npp_inter[interpolation]) );
|
||||||
}
|
}
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
if (stream == 0)
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// integral
|
// integral
|
||||||
|
|
||||||
void cv::gpu::integral(const GpuMat& src, GpuMat& sum)
|
void cv::gpu::integral(const GpuMat& src, GpuMat& sum, Stream& s)
|
||||||
{
|
{
|
||||||
GpuMat buffer;
|
GpuMat buffer;
|
||||||
integralBuffered(src, sum, buffer);
|
integralBuffered(src, sum, buffer, s);
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer)
|
void cv::gpu::integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer, Stream& s)
|
||||||
{
|
{
|
||||||
CV_Assert(src.type() == CV_8UC1);
|
CV_Assert(src.type() == CV_8UC1);
|
||||||
|
|
||||||
@@ -575,35 +573,45 @@ void cv::gpu::integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer)
|
|||||||
nppSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) );
|
nppSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) );
|
||||||
ensureSizeIsEnough(1, bufSize, CV_8UC1, buffer);
|
ensureSizeIsEnough(1, bufSize, CV_8UC1, buffer);
|
||||||
|
|
||||||
|
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||||
|
|
||||||
|
NppStStreamHandler h(stream);
|
||||||
|
|
||||||
nppSafeCall( nppiStIntegral_8u32u_C1R(const_cast<Ncv8u*>(src.ptr<Ncv8u>()), src.step,
|
nppSafeCall( nppiStIntegral_8u32u_C1R(const_cast<Ncv8u*>(src.ptr<Ncv8u>()), src.step,
|
||||||
sum.ptr<Ncv32u>(), sum.step, roiSize, buffer.ptr<Ncv8u>(), bufSize, prop) );
|
sum.ptr<Ncv32u>(), sum.step, roiSize, buffer.ptr<Ncv8u>(), bufSize, prop) );
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
if (stream == 0)
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::integral(const GpuMat& src, GpuMat& sum, GpuMat& sqsum)
|
void cv::gpu::integral(const GpuMat& src, GpuMat& sum, GpuMat& sqsum, Stream& s)
|
||||||
{
|
{
|
||||||
CV_Assert(src.type() == CV_8UC1);
|
CV_Assert(src.type() == CV_8UC1);
|
||||||
|
|
||||||
int w = src.cols + 1, h = src.rows + 1;
|
int width = src.cols + 1, height = src.rows + 1;
|
||||||
|
|
||||||
sum.create(h, w, CV_32S);
|
sum.create(height, width, CV_32S);
|
||||||
sqsum.create(h, w, CV_32F);
|
sqsum.create(height, width, CV_32F);
|
||||||
|
|
||||||
NppiSize sz;
|
NppiSize sz;
|
||||||
sz.width = src.cols;
|
sz.width = src.cols;
|
||||||
sz.height = src.rows;
|
sz.height = src.rows;
|
||||||
|
|
||||||
nppSafeCall( nppiSqrIntegral_8u32s32f_C1R(const_cast<Npp8u*>(src.ptr<Npp8u>()), src.step, sum.ptr<Npp32s>(),
|
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||||
sum.step, sqsum.ptr<Npp32f>(), sqsum.step, sz, 0, 0.0f, h) );
|
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
NppStreamHandler h(stream);
|
||||||
|
|
||||||
|
nppSafeCall( nppiSqrIntegral_8u32s32f_C1R(const_cast<Npp8u*>(src.ptr<Npp8u>()), src.step, sum.ptr<Npp32s>(),
|
||||||
|
sum.step, sqsum.ptr<Npp32f>(), sqsum.step, sz, 0, 0.0f, height) );
|
||||||
|
|
||||||
|
if (stream == 0)
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
// sqrIntegral
|
// sqrIntegral
|
||||||
|
|
||||||
void cv::gpu::sqrIntegral(const GpuMat& src, GpuMat& sqsum)
|
void cv::gpu::sqrIntegral(const GpuMat& src, GpuMat& sqsum, Stream& s)
|
||||||
{
|
{
|
||||||
CV_Assert(src.type() == CV_8U);
|
CV_Assert(src.type() == CV_8U);
|
||||||
|
|
||||||
@@ -618,11 +626,16 @@ void cv::gpu::sqrIntegral(const GpuMat& src, GpuMat& sqsum)
|
|||||||
nppSafeCall(nppiStSqrIntegralGetSize_8u64u(roiSize, &bufSize, prop));
|
nppSafeCall(nppiStSqrIntegralGetSize_8u64u(roiSize, &bufSize, prop));
|
||||||
GpuMat buf(1, bufSize, CV_8U);
|
GpuMat buf(1, bufSize, CV_8U);
|
||||||
|
|
||||||
|
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||||
|
|
||||||
|
NppStStreamHandler h(stream);
|
||||||
|
|
||||||
sqsum.create(src.rows + 1, src.cols + 1, CV_64F);
|
sqsum.create(src.rows + 1, src.cols + 1, CV_64F);
|
||||||
nppSafeCall(nppiStSqrIntegral_8u64u_C1R(const_cast<Ncv8u*>(src.ptr<Ncv8u>(0)), src.step,
|
nppSafeCall(nppiStSqrIntegral_8u64u_C1R(const_cast<Ncv8u*>(src.ptr<Ncv8u>(0)), src.step,
|
||||||
sqsum.ptr<Ncv64u>(0), sqsum.step, roiSize, buf.ptr<Ncv8u>(0), bufSize, prop));
|
sqsum.ptr<Ncv64u>(0), sqsum.step, roiSize, buf.ptr<Ncv8u>(0), bufSize, prop));
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
if (stream == 0)
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
@@ -641,7 +654,7 @@ void cv::gpu::columnSum(const GpuMat& src, GpuMat& dst)
|
|||||||
imgproc::columnSum_32F(src, dst);
|
imgproc::columnSum_32F(src, dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, const Rect& rect)
|
void cv::gpu::rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, const Rect& rect, Stream& s)
|
||||||
{
|
{
|
||||||
CV_Assert(src.type() == CV_32SC1 && sqr.type() == CV_32FC1);
|
CV_Assert(src.type() == CV_32SC1 && sqr.type() == CV_32FC1);
|
||||||
|
|
||||||
@@ -657,69 +670,17 @@ void cv::gpu::rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, cons
|
|||||||
nppRect.x = rect.x;
|
nppRect.x = rect.x;
|
||||||
nppRect.y = rect.y;
|
nppRect.y = rect.y;
|
||||||
|
|
||||||
|
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||||
|
|
||||||
|
NppStreamHandler h(stream);
|
||||||
|
|
||||||
nppSafeCall( nppiRectStdDev_32s32f_C1R(src.ptr<Npp32s>(), src.step, sqr.ptr<Npp32f>(), sqr.step,
|
nppSafeCall( nppiRectStdDev_32s32f_C1R(src.ptr<Npp32s>(), src.step, sqr.ptr<Npp32f>(), sqr.step,
|
||||||
dst.ptr<Npp32f>(), dst.step, sz, nppRect) );
|
dst.ptr<Npp32f>(), dst.step, sz, nppRect) );
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
if (stream == 0)
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
|
||||||
// Canny
|
|
||||||
|
|
||||||
//void cv::gpu::Canny(const GpuMat& image, GpuMat& edges, double threshold1, double threshold2, int apertureSize)
|
|
||||||
//{
|
|
||||||
// CV_Assert(!"disabled until fix crash");
|
|
||||||
//
|
|
||||||
// GpuMat srcDx, srcDy;
|
|
||||||
//
|
|
||||||
// Sobel(image, srcDx, CV_32F, 1, 0, apertureSize);
|
|
||||||
// Sobel(image, srcDy, CV_32F, 0, 1, apertureSize);
|
|
||||||
//
|
|
||||||
// GpuMat buf;
|
|
||||||
//
|
|
||||||
// Canny(srcDx, srcDy, edges, buf, threshold1, threshold2, apertureSize);
|
|
||||||
//}
|
|
||||||
//
|
|
||||||
//void cv::gpu::Canny(const GpuMat& image, GpuMat& edges, GpuMat& buf, double threshold1, double threshold2, int apertureSize)
|
|
||||||
//{
|
|
||||||
// CV_Assert(!"disabled until fix crash");
|
|
||||||
//
|
|
||||||
// GpuMat srcDx, srcDy;
|
|
||||||
//
|
|
||||||
// Sobel(image, srcDx, CV_32F, 1, 0, apertureSize);
|
|
||||||
// Sobel(image, srcDy, CV_32F, 0, 1, apertureSize);
|
|
||||||
//
|
|
||||||
// Canny(srcDx, srcDy, edges, buf, threshold1, threshold2, apertureSize);
|
|
||||||
//}
|
|
||||||
//
|
|
||||||
//void cv::gpu::Canny(const GpuMat& srcDx, const GpuMat& srcDy, GpuMat& edges, double threshold1, double threshold2, int apertureSize)
|
|
||||||
//{
|
|
||||||
// CV_Assert(!"disabled until fix crash");
|
|
||||||
//
|
|
||||||
// GpuMat buf;
|
|
||||||
// Canny(srcDx, srcDy, edges, buf, threshold1, threshold2, apertureSize);
|
|
||||||
//}
|
|
||||||
//
|
|
||||||
//void cv::gpu::Canny(const GpuMat& srcDx, const GpuMat& srcDy, GpuMat& edges, GpuMat& buf, double threshold1, double threshold2, int apertureSize)
|
|
||||||
//{
|
|
||||||
// CV_Assert(!"disabled until fix crash");
|
|
||||||
// CV_Assert(srcDx.type() == CV_32FC1 && srcDy.type() == CV_32FC1 && srcDx.size() == srcDy.size());
|
|
||||||
//
|
|
||||||
// edges.create(srcDx.size(), CV_8UC1);
|
|
||||||
//
|
|
||||||
// NppiSize sz;
|
|
||||||
// sz.height = srcDx.rows;
|
|
||||||
// sz.width = srcDx.cols;
|
|
||||||
//
|
|
||||||
// int bufsz;
|
|
||||||
// nppSafeCall( nppiCannyGetBufferSize(sz, &bufsz) );
|
|
||||||
// ensureSizeIsEnough(1, bufsz, CV_8UC1, buf);
|
|
||||||
//
|
|
||||||
// nppSafeCall( nppiCanny_32f8u_C1R(srcDx.ptr<Npp32f>(), srcDx.step, srcDy.ptr<Npp32f>(), srcDy.step,
|
|
||||||
// edges.ptr<Npp8u>(), edges.step, sz, (Npp32f)threshold1, (Npp32f)threshold2, buf.ptr<Npp8u>()) );
|
|
||||||
//
|
|
||||||
// cudaSafeCall( cudaThreadSynchronize() );
|
|
||||||
//}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// Histogram
|
// Histogram
|
||||||
@@ -755,7 +716,7 @@ namespace
|
|||||||
{
|
{
|
||||||
typedef typename NppHistogramEvenFuncC1<SDEPTH>::src_t src_t;
|
typedef typename NppHistogramEvenFuncC1<SDEPTH>::src_t src_t;
|
||||||
|
|
||||||
static void hist(const GpuMat& src, GpuMat& hist, int histSize, int lowerLevel, int upperLevel)
|
static void hist(const GpuMat& src, GpuMat& hist, int histSize, int lowerLevel, int upperLevel, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
int levels = histSize + 1;
|
int levels = histSize + 1;
|
||||||
hist.create(1, histSize, CV_32S);
|
hist.create(1, histSize, CV_32S);
|
||||||
@@ -769,10 +730,14 @@ namespace
|
|||||||
|
|
||||||
get_buf_size(sz, levels, &buf_size);
|
get_buf_size(sz, levels, &buf_size);
|
||||||
buffer.create(1, buf_size, CV_8U);
|
buffer.create(1, buf_size, CV_8U);
|
||||||
|
|
||||||
|
NppStreamHandler h(stream);
|
||||||
|
|
||||||
nppSafeCall( func(src.ptr<src_t>(), src.step, sz, hist.ptr<Npp32s>(), levels,
|
nppSafeCall( func(src.ptr<src_t>(), src.step, sz, hist.ptr<Npp32s>(), levels,
|
||||||
lowerLevel, upperLevel, buffer.ptr<Npp8u>()) );
|
lowerLevel, upperLevel, buffer.ptr<Npp8u>()) );
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
if (stream == 0)
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
template<int SDEPTH, typename NppHistogramEvenFuncC4<SDEPTH>::func_ptr func, get_buf_size_c4_t get_buf_size>
|
template<int SDEPTH, typename NppHistogramEvenFuncC4<SDEPTH>::func_ptr func, get_buf_size_c4_t get_buf_size>
|
||||||
@@ -780,7 +745,7 @@ namespace
|
|||||||
{
|
{
|
||||||
typedef typename NppHistogramEvenFuncC4<SDEPTH>::src_t src_t;
|
typedef typename NppHistogramEvenFuncC4<SDEPTH>::src_t src_t;
|
||||||
|
|
||||||
static void hist(const GpuMat& src, GpuMat hist[4], int histSize[4], int lowerLevel[4], int upperLevel[4])
|
static void hist(const GpuMat& src, GpuMat hist[4], int histSize[4], int lowerLevel[4], int upperLevel[4], cudaStream_t stream)
|
||||||
{
|
{
|
||||||
int levels[] = {histSize[0] + 1, histSize[1] + 1, histSize[2] + 1, histSize[3] + 1};
|
int levels[] = {histSize[0] + 1, histSize[1] + 1, histSize[2] + 1, histSize[3] + 1};
|
||||||
hist[0].create(1, histSize[0], CV_32S);
|
hist[0].create(1, histSize[0], CV_32S);
|
||||||
@@ -799,9 +764,13 @@ namespace
|
|||||||
|
|
||||||
get_buf_size(sz, levels, &buf_size);
|
get_buf_size(sz, levels, &buf_size);
|
||||||
buffer.create(1, buf_size, CV_8U);
|
buffer.create(1, buf_size, CV_8U);
|
||||||
|
|
||||||
|
NppStreamHandler h(stream);
|
||||||
|
|
||||||
nppSafeCall( func(src.ptr<src_t>(), src.step, sz, pHist, levels, lowerLevel, upperLevel, buffer.ptr<Npp8u>()) );
|
nppSafeCall( func(src.ptr<src_t>(), src.step, sz, pHist, levels, lowerLevel, upperLevel, buffer.ptr<Npp8u>()) );
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
if (stream == 0)
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -849,7 +818,7 @@ namespace
|
|||||||
typedef typename NppHistogramRangeFuncC1<SDEPTH>::level_t level_t;
|
typedef typename NppHistogramRangeFuncC1<SDEPTH>::level_t level_t;
|
||||||
enum {LEVEL_TYPE_CODE=NppHistogramRangeFuncC1<SDEPTH>::LEVEL_TYPE_CODE};
|
enum {LEVEL_TYPE_CODE=NppHistogramRangeFuncC1<SDEPTH>::LEVEL_TYPE_CODE};
|
||||||
|
|
||||||
static void hist(const GpuMat& src, GpuMat& hist, const GpuMat& levels)
|
static void hist(const GpuMat& src, GpuMat& hist, const GpuMat& levels, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
CV_Assert(levels.type() == LEVEL_TYPE_CODE && levels.rows == 1);
|
CV_Assert(levels.type() == LEVEL_TYPE_CODE && levels.rows == 1);
|
||||||
|
|
||||||
@@ -864,9 +833,13 @@ namespace
|
|||||||
|
|
||||||
get_buf_size(sz, levels.cols, &buf_size);
|
get_buf_size(sz, levels.cols, &buf_size);
|
||||||
buffer.create(1, buf_size, CV_8U);
|
buffer.create(1, buf_size, CV_8U);
|
||||||
|
|
||||||
|
NppStreamHandler h(stream);
|
||||||
|
|
||||||
nppSafeCall( func(src.ptr<src_t>(), src.step, sz, hist.ptr<Npp32s>(), levels.ptr<level_t>(), levels.cols, buffer.ptr<Npp8u>()) );
|
nppSafeCall( func(src.ptr<src_t>(), src.step, sz, hist.ptr<Npp32s>(), levels.ptr<level_t>(), levels.cols, buffer.ptr<Npp8u>()) );
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
if (stream == 0)
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
template<int SDEPTH, typename NppHistogramRangeFuncC4<SDEPTH>::func_ptr func, get_buf_size_c4_t get_buf_size>
|
template<int SDEPTH, typename NppHistogramRangeFuncC4<SDEPTH>::func_ptr func, get_buf_size_c4_t get_buf_size>
|
||||||
@@ -876,7 +849,7 @@ namespace
|
|||||||
typedef typename NppHistogramRangeFuncC1<SDEPTH>::level_t level_t;
|
typedef typename NppHistogramRangeFuncC1<SDEPTH>::level_t level_t;
|
||||||
enum {LEVEL_TYPE_CODE=NppHistogramRangeFuncC1<SDEPTH>::LEVEL_TYPE_CODE};
|
enum {LEVEL_TYPE_CODE=NppHistogramRangeFuncC1<SDEPTH>::LEVEL_TYPE_CODE};
|
||||||
|
|
||||||
static void hist(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4])
|
static void hist(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], cudaStream_t stream)
|
||||||
{
|
{
|
||||||
CV_Assert(levels[0].type() == LEVEL_TYPE_CODE && levels[0].rows == 1);
|
CV_Assert(levels[0].type() == LEVEL_TYPE_CODE && levels[0].rows == 1);
|
||||||
CV_Assert(levels[1].type() == LEVEL_TYPE_CODE && levels[1].rows == 1);
|
CV_Assert(levels[1].type() == LEVEL_TYPE_CODE && levels[1].rows == 1);
|
||||||
@@ -901,9 +874,13 @@ namespace
|
|||||||
|
|
||||||
get_buf_size(sz, nLevels, &buf_size);
|
get_buf_size(sz, nLevels, &buf_size);
|
||||||
buffer.create(1, buf_size, CV_8U);
|
buffer.create(1, buf_size, CV_8U);
|
||||||
|
|
||||||
|
NppStreamHandler h(stream);
|
||||||
|
|
||||||
nppSafeCall( func(src.ptr<src_t>(), src.step, sz, pHist, pLevels, nLevels, buffer.ptr<Npp8u>()) );
|
nppSafeCall( func(src.ptr<src_t>(), src.step, sz, pHist, pLevels, nLevels, buffer.ptr<Npp8u>()) );
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
if (stream == 0)
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@@ -915,11 +892,11 @@ void cv::gpu::evenLevels(GpuMat& levels, int nLevels, int lowerLevel, int upperL
|
|||||||
levels.upload(host_levels);
|
levels.upload(host_levels);
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::histEven(const GpuMat& src, GpuMat& hist, int histSize, int lowerLevel, int upperLevel)
|
void cv::gpu::histEven(const GpuMat& src, GpuMat& hist, int histSize, int lowerLevel, int upperLevel, Stream& stream)
|
||||||
{
|
{
|
||||||
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_16UC1 || src.type() == CV_16SC1 );
|
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_16UC1 || src.type() == CV_16SC1 );
|
||||||
|
|
||||||
typedef void (*hist_t)(const GpuMat& src, GpuMat& hist, int levels, int lowerLevel, int upperLevel);
|
typedef void (*hist_t)(const GpuMat& src, GpuMat& hist, int levels, int lowerLevel, int upperLevel, cudaStream_t stream);
|
||||||
static const hist_t hist_callers[] =
|
static const hist_t hist_callers[] =
|
||||||
{
|
{
|
||||||
NppHistogramEvenC1<CV_8U , nppiHistogramEven_8u_C1R , nppiHistogramEvenGetBufferSize_8u_C1R >::hist,
|
NppHistogramEvenC1<CV_8U , nppiHistogramEven_8u_C1R , nppiHistogramEvenGetBufferSize_8u_C1R >::hist,
|
||||||
@@ -928,14 +905,14 @@ void cv::gpu::histEven(const GpuMat& src, GpuMat& hist, int histSize, int lowerL
|
|||||||
NppHistogramEvenC1<CV_16S, nppiHistogramEven_16s_C1R, nppiHistogramEvenGetBufferSize_16s_C1R>::hist
|
NppHistogramEvenC1<CV_16S, nppiHistogramEven_16s_C1R, nppiHistogramEvenGetBufferSize_16s_C1R>::hist
|
||||||
};
|
};
|
||||||
|
|
||||||
hist_callers[src.depth()](src, hist, histSize, lowerLevel, upperLevel);
|
hist_callers[src.depth()](src, hist, histSize, lowerLevel, upperLevel, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::histEven(const GpuMat& src, GpuMat hist[4], int histSize[4], int lowerLevel[4], int upperLevel[4])
|
void cv::gpu::histEven(const GpuMat& src, GpuMat hist[4], int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream)
|
||||||
{
|
{
|
||||||
CV_Assert(src.type() == CV_8UC4 || src.type() == CV_16UC4 || src.type() == CV_16SC4 );
|
CV_Assert(src.type() == CV_8UC4 || src.type() == CV_16UC4 || src.type() == CV_16SC4 );
|
||||||
|
|
||||||
typedef void (*hist_t)(const GpuMat& src, GpuMat hist[4], int levels[4], int lowerLevel[4], int upperLevel[4]);
|
typedef void (*hist_t)(const GpuMat& src, GpuMat hist[4], int levels[4], int lowerLevel[4], int upperLevel[4], cudaStream_t stream);
|
||||||
static const hist_t hist_callers[] =
|
static const hist_t hist_callers[] =
|
||||||
{
|
{
|
||||||
NppHistogramEvenC4<CV_8U , nppiHistogramEven_8u_C4R , nppiHistogramEvenGetBufferSize_8u_C4R >::hist,
|
NppHistogramEvenC4<CV_8U , nppiHistogramEven_8u_C4R , nppiHistogramEvenGetBufferSize_8u_C4R >::hist,
|
||||||
@@ -944,14 +921,14 @@ void cv::gpu::histEven(const GpuMat& src, GpuMat hist[4], int histSize[4], int l
|
|||||||
NppHistogramEvenC4<CV_16S, nppiHistogramEven_16s_C4R, nppiHistogramEvenGetBufferSize_16s_C4R>::hist
|
NppHistogramEvenC4<CV_16S, nppiHistogramEven_16s_C4R, nppiHistogramEvenGetBufferSize_16s_C4R>::hist
|
||||||
};
|
};
|
||||||
|
|
||||||
hist_callers[src.depth()](src, hist, histSize, lowerLevel, upperLevel);
|
hist_callers[src.depth()](src, hist, histSize, lowerLevel, upperLevel, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::histRange(const GpuMat& src, GpuMat& hist, const GpuMat& levels)
|
void cv::gpu::histRange(const GpuMat& src, GpuMat& hist, const GpuMat& levels, Stream& stream)
|
||||||
{
|
{
|
||||||
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_16UC1 || src.type() == CV_16SC1 || src.type() == CV_32FC1);
|
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_16UC1 || src.type() == CV_16SC1 || src.type() == CV_32FC1);
|
||||||
|
|
||||||
typedef void (*hist_t)(const GpuMat& src, GpuMat& hist, const GpuMat& levels);
|
typedef void (*hist_t)(const GpuMat& src, GpuMat& hist, const GpuMat& levels, cudaStream_t stream);
|
||||||
static const hist_t hist_callers[] =
|
static const hist_t hist_callers[] =
|
||||||
{
|
{
|
||||||
NppHistogramRangeC1<CV_8U , nppiHistogramRange_8u_C1R , nppiHistogramRangeGetBufferSize_8u_C1R >::hist,
|
NppHistogramRangeC1<CV_8U , nppiHistogramRange_8u_C1R , nppiHistogramRangeGetBufferSize_8u_C1R >::hist,
|
||||||
@@ -962,14 +939,14 @@ void cv::gpu::histRange(const GpuMat& src, GpuMat& hist, const GpuMat& levels)
|
|||||||
NppHistogramRangeC1<CV_32F, nppiHistogramRange_32f_C1R, nppiHistogramRangeGetBufferSize_32f_C1R>::hist
|
NppHistogramRangeC1<CV_32F, nppiHistogramRange_32f_C1R, nppiHistogramRangeGetBufferSize_32f_C1R>::hist
|
||||||
};
|
};
|
||||||
|
|
||||||
hist_callers[src.depth()](src, hist, levels);
|
hist_callers[src.depth()](src, hist, levels, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::histRange(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4])
|
void cv::gpu::histRange(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], Stream& stream)
|
||||||
{
|
{
|
||||||
CV_Assert(src.type() == CV_8UC4 || src.type() == CV_16UC4 || src.type() == CV_16SC4 || src.type() == CV_32FC4);
|
CV_Assert(src.type() == CV_8UC4 || src.type() == CV_16UC4 || src.type() == CV_16SC4 || src.type() == CV_32FC4);
|
||||||
|
|
||||||
typedef void (*hist_t)(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4]);
|
typedef void (*hist_t)(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], cudaStream_t stream);
|
||||||
static const hist_t hist_callers[] =
|
static const hist_t hist_callers[] =
|
||||||
{
|
{
|
||||||
NppHistogramRangeC4<CV_8U , nppiHistogramRange_8u_C4R , nppiHistogramRangeGetBufferSize_8u_C4R >::hist,
|
NppHistogramRangeC4<CV_8U , nppiHistogramRange_8u_C4R , nppiHistogramRangeGetBufferSize_8u_C4R >::hist,
|
||||||
@@ -980,7 +957,7 @@ void cv::gpu::histRange(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4
|
|||||||
NppHistogramRangeC4<CV_32F, nppiHistogramRange_32f_C4R, nppiHistogramRangeGetBufferSize_32f_C4R>::hist
|
NppHistogramRangeC4<CV_32F, nppiHistogramRange_32f_C4R, nppiHistogramRangeGetBufferSize_32f_C4R>::hist
|
||||||
};
|
};
|
||||||
|
|
||||||
hist_callers[src.depth()](src, hist, levels);
|
hist_callers[src.depth()](src, hist, levels, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
|
|||||||
@@ -128,7 +128,7 @@ void cv::gpu::GpuMat::copyTo( GpuMat& m ) const
|
|||||||
CV_DbgAssert(!this->empty());
|
CV_DbgAssert(!this->empty());
|
||||||
m.create(size(), type());
|
m.create(size(), type());
|
||||||
cudaSafeCall( cudaMemcpy2D(m.data, m.step, data, step, cols * elemSize(), rows, cudaMemcpyDeviceToDevice) );
|
cudaSafeCall( cudaMemcpy2D(m.data, m.step, data, step, cols * elemSize(), rows, cudaMemcpyDeviceToDevice) );
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::GpuMat::copyTo( GpuMat& mat, const GpuMat& mask ) const
|
void cv::gpu::GpuMat::copyTo( GpuMat& mat, const GpuMat& mask ) const
|
||||||
@@ -179,7 +179,7 @@ namespace
|
|||||||
sz.height = src.rows;
|
sz.height = src.rows;
|
||||||
nppSafeCall( func(src.ptr<src_t>(), src.step, dst.ptr<dst_t>(), dst.step, sz) );
|
nppSafeCall( func(src.ptr<src_t>(), src.step, dst.ptr<dst_t>(), dst.step, sz) );
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
template<int DDEPTH, typename NppConvertFunc<CV_32F, DDEPTH>::func_ptr func> struct NppCvt<CV_32F, DDEPTH, func>
|
template<int DDEPTH, typename NppConvertFunc<CV_32F, DDEPTH>::func_ptr func> struct NppCvt<CV_32F, DDEPTH, func>
|
||||||
@@ -193,7 +193,7 @@ namespace
|
|||||||
sz.height = src.rows;
|
sz.height = src.rows;
|
||||||
nppSafeCall( func(src.ptr<Npp32f>(), src.step, dst.ptr<dst_t>(), dst.step, sz, NPP_RND_NEAR) );
|
nppSafeCall( func(src.ptr<Npp32f>(), src.step, dst.ptr<dst_t>(), dst.step, sz, NPP_RND_NEAR) );
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -349,7 +349,7 @@ namespace
|
|||||||
Scalar_<src_t> nppS = s;
|
Scalar_<src_t> nppS = s;
|
||||||
nppSafeCall( func(nppS.val, src.ptr<src_t>(), src.step, sz) );
|
nppSafeCall( func(nppS.val, src.ptr<src_t>(), src.step, sz) );
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
template<int SDEPTH, typename NppSetFunc<SDEPTH, 1>::func_ptr func> struct NppSet<SDEPTH, 1, func>
|
template<int SDEPTH, typename NppSetFunc<SDEPTH, 1>::func_ptr func> struct NppSet<SDEPTH, 1, func>
|
||||||
@@ -364,7 +364,7 @@ namespace
|
|||||||
Scalar_<src_t> nppS = s;
|
Scalar_<src_t> nppS = s;
|
||||||
nppSafeCall( func(nppS[0], src.ptr<src_t>(), src.step, sz) );
|
nppSafeCall( func(nppS[0], src.ptr<src_t>(), src.step, sz) );
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -400,7 +400,7 @@ namespace
|
|||||||
Scalar_<src_t> nppS = s;
|
Scalar_<src_t> nppS = s;
|
||||||
nppSafeCall( func(nppS.val, src.ptr<src_t>(), src.step, sz, mask.ptr<Npp8u>(), mask.step) );
|
nppSafeCall( func(nppS.val, src.ptr<src_t>(), src.step, sz, mask.ptr<Npp8u>(), mask.step) );
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
template<int SDEPTH, typename NppSetMaskFunc<SDEPTH, 1>::func_ptr func> struct NppSetMask<SDEPTH, 1, func>
|
template<int SDEPTH, typename NppSetMaskFunc<SDEPTH, 1>::func_ptr func> struct NppSetMask<SDEPTH, 1, func>
|
||||||
@@ -415,7 +415,7 @@ namespace
|
|||||||
Scalar_<src_t> nppS = s;
|
Scalar_<src_t> nppS = s;
|
||||||
nppSafeCall( func(nppS[0], src.ptr<src_t>(), src.step, sz, mask.ptr<Npp8u>(), mask.step) );
|
nppSafeCall( func(nppS[0], src.ptr<src_t>(), src.step, sz, mask.ptr<Npp8u>(), mask.step) );
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -463,8 +463,8 @@ GpuMat& GpuMat::setTo(const Scalar& s, const GpuMat& mask)
|
|||||||
{
|
{
|
||||||
{NppSet<CV_8U, 1, nppiSet_8u_C1R>::set,kernelSet<uchar>,kernelSet<uchar>,NppSet<CV_8U, 4, nppiSet_8u_C4R>::set},
|
{NppSet<CV_8U, 1, nppiSet_8u_C1R>::set,kernelSet<uchar>,kernelSet<uchar>,NppSet<CV_8U, 4, nppiSet_8u_C4R>::set},
|
||||||
{kernelSet<schar>,kernelSet<schar>,kernelSet<schar>,kernelSet<schar>},
|
{kernelSet<schar>,kernelSet<schar>,kernelSet<schar>,kernelSet<schar>},
|
||||||
{NppSet<CV_16U, 1, nppiSet_16u_C1R>::set,kernelSet<ushort>,kernelSet<ushort>,NppSet<CV_16U, 4, nppiSet_16u_C4R>::set},
|
{NppSet<CV_16U, 1, nppiSet_16u_C1R>::set,NppSet<CV_16U, 2, nppiSet_16u_C2R>::set,kernelSet<ushort>,NppSet<CV_16U, 4, nppiSet_16u_C4R>::set},
|
||||||
{NppSet<CV_16S, 1, nppiSet_16s_C1R>::set,kernelSet<short>,kernelSet<short>,NppSet<CV_16S, 4, nppiSet_16s_C4R>::set},
|
{NppSet<CV_16S, 1, nppiSet_16s_C1R>::set,NppSet<CV_16S, 2, nppiSet_16s_C2R>::set,kernelSet<short>,NppSet<CV_16S, 4, nppiSet_16s_C4R>::set},
|
||||||
{NppSet<CV_32S, 1, nppiSet_32s_C1R>::set,kernelSet<int>,kernelSet<int>,NppSet<CV_32S, 4, nppiSet_32s_C4R>::set},
|
{NppSet<CV_32S, 1, nppiSet_32s_C1R>::set,kernelSet<int>,kernelSet<int>,NppSet<CV_32S, 4, nppiSet_32s_C4R>::set},
|
||||||
{NppSet<CV_32F, 1, nppiSet_32f_C1R>::set,kernelSet<float>,kernelSet<float>,NppSet<CV_32F, 4, nppiSet_32f_C4R>::set},
|
{NppSet<CV_32F, 1, nppiSet_32f_C1R>::set,kernelSet<float>,kernelSet<float>,NppSet<CV_32F, 4, nppiSet_32f_C4R>::set},
|
||||||
{kernelSet<double>,kernelSet<double>,kernelSet<double>,kernelSet<double>},
|
{kernelSet<double>,kernelSet<double>,kernelSet<double>,kernelSet<double>},
|
||||||
|
|||||||
@@ -114,24 +114,14 @@ void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev)
|
|||||||
sz.width = src.cols;
|
sz.width = src.cols;
|
||||||
sz.height = src.rows;
|
sz.height = src.rows;
|
||||||
|
|
||||||
#if NPP_VERSION_MAJOR >= 4
|
|
||||||
|
|
||||||
DeviceBuffer dbuf(2);
|
DeviceBuffer dbuf(2);
|
||||||
|
|
||||||
nppSafeCall( nppiMean_StdDev_8u_C1R(src.ptr<Npp8u>(), src.step, sz, dbuf, (double*)dbuf + 1) );
|
nppSafeCall( nppiMean_StdDev_8u_C1R(src.ptr<Npp8u>(), src.step, sz, dbuf, (double*)dbuf + 1) );
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
|
|
||||||
double* ptrs[2] = {mean.val, stddev.val};
|
double* ptrs[2] = {mean.val, stddev.val};
|
||||||
dbuf.download(ptrs);
|
dbuf.download(ptrs);
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
nppSafeCall( nppiMean_StdDev_8u_C1R(src.ptr<Npp8u>(), src.step, sz, mean.val, stddev.val) );
|
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
|
||||||
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -185,24 +175,14 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType)
|
|||||||
|
|
||||||
double retVal;
|
double retVal;
|
||||||
|
|
||||||
#if NPP_VERSION_MAJOR >= 4
|
|
||||||
|
|
||||||
DeviceBuffer dbuf;
|
DeviceBuffer dbuf;
|
||||||
|
|
||||||
nppSafeCall( npp_norm_diff_func[funcIdx](src1.ptr<Npp8u>(), src1.step, src2.ptr<Npp8u>(), src2.step, sz, dbuf) );
|
nppSafeCall( npp_norm_diff_func[funcIdx](src1.ptr<Npp8u>(), src1.step, src2.ptr<Npp8u>(), src2.step, sz, dbuf) );
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
|
|
||||||
dbuf.download(&retVal);
|
dbuf.download(&retVal);
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
nppSafeCall( npp_norm_diff_func[funcIdx](src1.ptr<Npp8u>(), src1.step, src2.ptr<Npp8u>(), src2.step, sz, &retVal) );
|
|
||||||
|
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return retVal;
|
return retVal;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -332,7 +332,7 @@ namespace cv
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||||
@@ -349,7 +349,7 @@ namespace cv
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
template<> struct TransformDispatcher<true>
|
template<> struct TransformDispatcher<true>
|
||||||
@@ -370,7 +370,7 @@ namespace cv
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||||
@@ -389,7 +389,7 @@ namespace cv
|
|||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaThreadSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -77,8 +77,8 @@
|
|||||||
#include "nvidia/NPP_staging/NPP_staging.hpp"
|
#include "nvidia/NPP_staging/NPP_staging.hpp"
|
||||||
#include "nvidia/NCVHaarObjectDetection.hpp"
|
#include "nvidia/NCVHaarObjectDetection.hpp"
|
||||||
|
|
||||||
#define CUDART_MINIMUM_REQUIRED_VERSION 3020
|
#define CUDART_MINIMUM_REQUIRED_VERSION 4000
|
||||||
#define NPP_MINIMUM_REQUIRED_VERSION 3216
|
#define NPP_MINIMUM_REQUIRED_VERSION 4000
|
||||||
|
|
||||||
#if (CUDART_VERSION < CUDART_MINIMUM_REQUIRED_VERSION)
|
#if (CUDART_VERSION < CUDART_MINIMUM_REQUIRED_VERSION)
|
||||||
#error "Insufficient Cuda Runtime library version, please update it."
|
#error "Insufficient Cuda Runtime library version, please update it."
|
||||||
|
|||||||
@@ -46,14 +46,10 @@ using namespace std;
|
|||||||
|
|
||||||
#if !defined (HAVE_CUDA)
|
#if !defined (HAVE_CUDA)
|
||||||
|
|
||||||
void cv::gpu::merge(const GpuMat* /*src*/, size_t /*count*/, GpuMat& /*dst*/) { throw_nogpu(); }
|
void cv::gpu::merge(const GpuMat* /*src*/, size_t /*count*/, GpuMat& /*dst*/, Stream& /*stream*/) { throw_nogpu(); }
|
||||||
void cv::gpu::merge(const vector<GpuMat>& /*src*/, GpuMat& /*dst*/) { throw_nogpu(); }
|
void cv::gpu::merge(const vector<GpuMat>& /*src*/, GpuMat& /*dst*/, Stream& /*stream*/) { throw_nogpu(); }
|
||||||
void cv::gpu::merge(const GpuMat* /*src*/, size_t /*count*/, GpuMat& /*dst*/, const Stream& /*stream*/) { throw_nogpu(); }
|
void cv::gpu::split(const GpuMat& /*src*/, GpuMat* /*dst*/, Stream& /*stream*/) { throw_nogpu(); }
|
||||||
void cv::gpu::merge(const vector<GpuMat>& /*src*/, GpuMat& /*dst*/, const Stream& /*stream*/) { throw_nogpu(); }
|
void cv::gpu::split(const GpuMat& /*src*/, vector<GpuMat>& /*dst*/, Stream& /*stream*/) { throw_nogpu(); }
|
||||||
void cv::gpu::split(const GpuMat& /*src*/, GpuMat* /*dst*/) { throw_nogpu(); }
|
|
||||||
void cv::gpu::split(const GpuMat& /*src*/, vector<GpuMat>& /*dst*/) { throw_nogpu(); }
|
|
||||||
void cv::gpu::split(const GpuMat& /*src*/, GpuMat* /*dst*/, const Stream& /*stream*/) { throw_nogpu(); }
|
|
||||||
void cv::gpu::split(const GpuMat& /*src*/, vector<GpuMat>& /*dst*/, const Stream& /*stream*/) { throw_nogpu(); }
|
|
||||||
|
|
||||||
#else /* !defined (HAVE_CUDA) */
|
#else /* !defined (HAVE_CUDA) */
|
||||||
|
|
||||||
@@ -148,51 +144,25 @@ namespace cv { namespace gpu { namespace split_merge
|
|||||||
}}}
|
}}}
|
||||||
|
|
||||||
|
|
||||||
void cv::gpu::merge(const GpuMat* src, size_t n, GpuMat& dst)
|
void cv::gpu::merge(const GpuMat* src, size_t n, GpuMat& dst, Stream& stream)
|
||||||
{
|
|
||||||
split_merge::merge(src, n, dst, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void cv::gpu::merge(const vector<GpuMat>& src, GpuMat& dst)
|
|
||||||
{
|
|
||||||
split_merge::merge(&src[0], src.size(), dst, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void cv::gpu::merge(const GpuMat* src, size_t n, GpuMat& dst, const Stream& stream)
|
|
||||||
{
|
{
|
||||||
split_merge::merge(src, n, dst, StreamAccessor::getStream(stream));
|
split_merge::merge(src, n, dst, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void cv::gpu::merge(const vector<GpuMat>& src, GpuMat& dst, const Stream& stream)
|
void cv::gpu::merge(const vector<GpuMat>& src, GpuMat& dst, Stream& stream)
|
||||||
{
|
{
|
||||||
split_merge::merge(&src[0], src.size(), dst, StreamAccessor::getStream(stream));
|
split_merge::merge(&src[0], src.size(), dst, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void cv::gpu::split(const GpuMat& src, GpuMat* dst)
|
void cv::gpu::split(const GpuMat& src, GpuMat* dst, Stream& stream)
|
||||||
{
|
|
||||||
split_merge::split(src, dst, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void cv::gpu::split(const GpuMat& src, vector<GpuMat>& dst)
|
|
||||||
{
|
|
||||||
dst.resize(src.channels());
|
|
||||||
if(src.channels() > 0)
|
|
||||||
split_merge::split(src, &dst[0], 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void cv::gpu::split(const GpuMat& src, GpuMat* dst, const Stream& stream)
|
|
||||||
{
|
{
|
||||||
split_merge::split(src, dst, StreamAccessor::getStream(stream));
|
split_merge::split(src, dst, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void cv::gpu::split(const GpuMat& src, vector<GpuMat>& dst, const Stream& stream)
|
void cv::gpu::split(const GpuMat& src, vector<GpuMat>& dst, Stream& stream)
|
||||||
{
|
{
|
||||||
dst.resize(src.channels());
|
dst.resize(src.channels());
|
||||||
if(src.channels() > 0)
|
if(src.channels() > 0)
|
||||||
|
|||||||
@@ -51,8 +51,7 @@ cv::gpu::StereoBM_GPU::StereoBM_GPU() { throw_nogpu(); }
|
|||||||
cv::gpu::StereoBM_GPU::StereoBM_GPU(int, int, int) { throw_nogpu(); }
|
cv::gpu::StereoBM_GPU::StereoBM_GPU(int, int, int) { throw_nogpu(); }
|
||||||
|
|
||||||
bool cv::gpu::StereoBM_GPU::checkIfGpuCallReasonable() { throw_nogpu(); return false; }
|
bool cv::gpu::StereoBM_GPU::checkIfGpuCallReasonable() { throw_nogpu(); return false; }
|
||||||
void cv::gpu::StereoBM_GPU::operator() ( const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); }
|
void cv::gpu::StereoBM_GPU::operator() ( const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||||
void cv::gpu::StereoBM_GPU::operator() ( const GpuMat&, const GpuMat&, GpuMat&, const Stream&) { throw_nogpu(); }
|
|
||||||
|
|
||||||
#else /* !defined (HAVE_CUDA) */
|
#else /* !defined (HAVE_CUDA) */
|
||||||
|
|
||||||
@@ -124,13 +123,7 @@ static void stereo_bm_gpu_operator ( GpuMat& minSSD, GpuMat& leBuf, GpuMat& ri
|
|||||||
bm::postfilter_textureness(le_for_bm, winSize, avergeTexThreshold, disparity, stream);
|
bm::postfilter_textureness(le_for_bm, winSize, avergeTexThreshold, disparity, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity, Stream& stream)
|
||||||
void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity)
|
|
||||||
{
|
|
||||||
::stereo_bm_gpu_operator(minSSD, leBuf, riBuf, preset, ndisp, winSize, avergeTexThreshold, left, right, disparity, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity, const Stream& stream)
|
|
||||||
{
|
{
|
||||||
::stereo_bm_gpu_operator(minSSD, leBuf, riBuf, preset, ndisp, winSize, avergeTexThreshold, left, right, disparity, StreamAccessor::getStream(stream));
|
::stereo_bm_gpu_operator(minSSD, leBuf, riBuf, preset, ndisp, winSize, avergeTexThreshold, left, right, disparity, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -53,10 +53,8 @@ void cv::gpu::StereoBeliefPropagation::estimateRecommendedParams(int, int, int&,
|
|||||||
cv::gpu::StereoBeliefPropagation::StereoBeliefPropagation(int, int, int, int) { throw_nogpu(); }
|
cv::gpu::StereoBeliefPropagation::StereoBeliefPropagation(int, int, int, int) { throw_nogpu(); }
|
||||||
cv::gpu::StereoBeliefPropagation::StereoBeliefPropagation(int, int, int, float, float, float, float, int) { throw_nogpu(); }
|
cv::gpu::StereoBeliefPropagation::StereoBeliefPropagation(int, int, int, float, float, float, float, int) { throw_nogpu(); }
|
||||||
|
|
||||||
void cv::gpu::StereoBeliefPropagation::operator()(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); }
|
|
||||||
void cv::gpu::StereoBeliefPropagation::operator()(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
void cv::gpu::StereoBeliefPropagation::operator()(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||||
|
|
||||||
void cv::gpu::StereoBeliefPropagation::operator()(const GpuMat&, GpuMat&) { throw_nogpu(); }
|
|
||||||
void cv::gpu::StereoBeliefPropagation::operator()(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
void cv::gpu::StereoBeliefPropagation::operator()(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||||
|
|
||||||
#else /* !defined (HAVE_CUDA) */
|
#else /* !defined (HAVE_CUDA) */
|
||||||
@@ -133,7 +131,7 @@ namespace
|
|||||||
CV_Assert(rthis.msg_type == CV_32F || (1 << (rthis.levels - 1)) * scale * rthis.max_data_term < numeric_limits<short>::max());
|
CV_Assert(rthis.msg_type == CV_32F || (1 << (rthis.levels - 1)) * scale * rthis.max_data_term < numeric_limits<short>::max());
|
||||||
}
|
}
|
||||||
|
|
||||||
void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disp, cudaStream_t stream)
|
void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disp, Stream& stream)
|
||||||
{
|
{
|
||||||
typedef void (*comp_data_t)(const DevMem2D& left, const DevMem2D& right, const DevMem2D& data, cudaStream_t stream);
|
typedef void (*comp_data_t)(const DevMem2D& left, const DevMem2D& right, const DevMem2D& data, cudaStream_t stream);
|
||||||
static const comp_data_t comp_data_callers[2][5] =
|
static const comp_data_t comp_data_callers[2][5] =
|
||||||
@@ -154,16 +152,16 @@ namespace
|
|||||||
const int min_image_dim_size = 2;
|
const int min_image_dim_size = 2;
|
||||||
CV_Assert(min(lowest_cols, lowest_rows) > min_image_dim_size);
|
CV_Assert(min(lowest_cols, lowest_rows) > min_image_dim_size);
|
||||||
|
|
||||||
init();
|
init(stream);
|
||||||
|
|
||||||
datas[0].create(rows * rthis.ndisp, cols, rthis.msg_type);
|
datas[0].create(rows * rthis.ndisp, cols, rthis.msg_type);
|
||||||
|
|
||||||
comp_data_callers[rthis.msg_type == CV_32F][left.channels()](left, right, datas[0], stream);
|
comp_data_callers[rthis.msg_type == CV_32F][left.channels()](left, right, datas[0], StreamAccessor::getStream(stream));
|
||||||
|
|
||||||
calcBP(disp, stream);
|
calcBP(disp, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
void operator()(const GpuMat& data, GpuMat& disp, cudaStream_t stream)
|
void operator()(const GpuMat& data, GpuMat& disp, Stream& stream)
|
||||||
{
|
{
|
||||||
CV_Assert((data.type() == rthis.msg_type) && (data.rows % rthis.ndisp == 0));
|
CV_Assert((data.type() == rthis.msg_type) && (data.rows % rthis.ndisp == 0));
|
||||||
|
|
||||||
@@ -176,14 +174,14 @@ namespace
|
|||||||
const int min_image_dim_size = 2;
|
const int min_image_dim_size = 2;
|
||||||
CV_Assert(min(lowest_cols, lowest_rows) > min_image_dim_size);
|
CV_Assert(min(lowest_cols, lowest_rows) > min_image_dim_size);
|
||||||
|
|
||||||
init();
|
init(stream);
|
||||||
|
|
||||||
datas[0] = data;
|
datas[0] = data;
|
||||||
|
|
||||||
calcBP(disp, stream);
|
calcBP(disp, stream);
|
||||||
}
|
}
|
||||||
private:
|
private:
|
||||||
void init()
|
void init(Stream& stream)
|
||||||
{
|
{
|
||||||
u.create(rows * rthis.ndisp, cols, rthis.msg_type);
|
u.create(rows * rthis.ndisp, cols, rthis.msg_type);
|
||||||
d.create(rows * rthis.ndisp, cols, rthis.msg_type);
|
d.create(rows * rthis.ndisp, cols, rthis.msg_type);
|
||||||
@@ -193,10 +191,20 @@ namespace
|
|||||||
if (rthis.levels & 1)
|
if (rthis.levels & 1)
|
||||||
{
|
{
|
||||||
//can clear less area
|
//can clear less area
|
||||||
u = zero;
|
if (stream)
|
||||||
d = zero;
|
{
|
||||||
l = zero;
|
stream.enqueueMemSet(u, zero);
|
||||||
r = zero;
|
stream.enqueueMemSet(d, zero);
|
||||||
|
stream.enqueueMemSet(l, zero);
|
||||||
|
stream.enqueueMemSet(r, zero);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
u.setTo(zero);
|
||||||
|
d.setTo(zero);
|
||||||
|
l.setTo(zero);
|
||||||
|
r.setTo(zero);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (rthis.levels > 1)
|
if (rthis.levels > 1)
|
||||||
@@ -211,10 +219,20 @@ namespace
|
|||||||
|
|
||||||
if ((rthis.levels & 1) == 0)
|
if ((rthis.levels & 1) == 0)
|
||||||
{
|
{
|
||||||
u2 = zero;
|
if (stream)
|
||||||
d2 = zero;
|
{
|
||||||
l2 = zero;
|
stream.enqueueMemSet(u2, zero);
|
||||||
r2 = zero;
|
stream.enqueueMemSet(d2, zero);
|
||||||
|
stream.enqueueMemSet(l2, zero);
|
||||||
|
stream.enqueueMemSet(r2, zero);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
u2.setTo(zero);
|
||||||
|
d2.setTo(zero);
|
||||||
|
l2.setTo(zero);
|
||||||
|
r2.setTo(zero);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -229,7 +247,7 @@ namespace
|
|||||||
rows_all[0] = rows;
|
rows_all[0] = rows;
|
||||||
}
|
}
|
||||||
|
|
||||||
void calcBP(GpuMat& disp, cudaStream_t stream)
|
void calcBP(GpuMat& disp, Stream& stream)
|
||||||
{
|
{
|
||||||
using namespace cv::gpu::bp;
|
using namespace cv::gpu::bp;
|
||||||
|
|
||||||
@@ -259,6 +277,8 @@ namespace
|
|||||||
|
|
||||||
const int funcIdx = rthis.msg_type == CV_32F;
|
const int funcIdx = rthis.msg_type == CV_32F;
|
||||||
|
|
||||||
|
cudaStream_t cudaStream = StreamAccessor::getStream(stream);
|
||||||
|
|
||||||
for (int i = 1; i < rthis.levels; ++i)
|
for (int i = 1; i < rthis.levels; ++i)
|
||||||
{
|
{
|
||||||
cols_all[i] = (cols_all[i-1] + 1) / 2;
|
cols_all[i] = (cols_all[i-1] + 1) / 2;
|
||||||
@@ -266,7 +286,7 @@ namespace
|
|||||||
|
|
||||||
datas[i].create(rows_all[i] * rthis.ndisp, cols_all[i], rthis.msg_type);
|
datas[i].create(rows_all[i] * rthis.ndisp, cols_all[i], rthis.msg_type);
|
||||||
|
|
||||||
data_step_down_callers[funcIdx](cols_all[i], rows_all[i], rows_all[i-1], datas[i-1], datas[i], stream);
|
data_step_down_callers[funcIdx](cols_all[i], rows_all[i], rows_all[i-1], datas[i-1], datas[i], cudaStream);
|
||||||
}
|
}
|
||||||
|
|
||||||
DevMem2D mus[] = {u, u2};
|
DevMem2D mus[] = {u, u2};
|
||||||
@@ -280,9 +300,9 @@ namespace
|
|||||||
{
|
{
|
||||||
// for lower level we have already computed messages by setting to zero
|
// for lower level we have already computed messages by setting to zero
|
||||||
if (i != rthis.levels - 1)
|
if (i != rthis.levels - 1)
|
||||||
level_up_messages_callers[funcIdx](mem_idx, cols_all[i], rows_all[i], rows_all[i+1], mus, mds, mls, mrs, stream);
|
level_up_messages_callers[funcIdx](mem_idx, cols_all[i], rows_all[i], rows_all[i+1], mus, mds, mls, mrs, cudaStream);
|
||||||
|
|
||||||
calc_all_iterations_callers[funcIdx](cols_all[i], rows_all[i], rthis.iters, mus[mem_idx], mds[mem_idx], mls[mem_idx], mrs[mem_idx], datas[i], stream);
|
calc_all_iterations_callers[funcIdx](cols_all[i], rows_all[i], rthis.iters, mus[mem_idx], mds[mem_idx], mls[mem_idx], mrs[mem_idx], datas[i], cudaStream);
|
||||||
|
|
||||||
mem_idx = (mem_idx + 1) & 1;
|
mem_idx = (mem_idx + 1) & 1;
|
||||||
}
|
}
|
||||||
@@ -291,12 +311,21 @@ namespace
|
|||||||
disp.create(rows, cols, CV_16S);
|
disp.create(rows, cols, CV_16S);
|
||||||
|
|
||||||
out = ((disp.type() == CV_16S) ? disp : (out.create(rows, cols, CV_16S), out));
|
out = ((disp.type() == CV_16S) ? disp : (out.create(rows, cols, CV_16S), out));
|
||||||
out = zero;
|
|
||||||
|
|
||||||
output_callers[funcIdx](u, d, l, r, datas.front(), out, stream);
|
if (stream)
|
||||||
|
stream.enqueueMemSet(out, zero);
|
||||||
|
else
|
||||||
|
out.setTo(zero);
|
||||||
|
|
||||||
|
output_callers[funcIdx](u, d, l, r, datas.front(), out, cudaStream);
|
||||||
|
|
||||||
if (disp.type() != CV_16S)
|
if (disp.type() != CV_16S)
|
||||||
out.convertTo(disp, disp.type());
|
{
|
||||||
|
if (stream)
|
||||||
|
stream.enqueueConvert(out, disp, disp.type());
|
||||||
|
else
|
||||||
|
out.convertTo(disp, disp.type());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
StereoBeliefPropagation& rthis;
|
StereoBeliefPropagation& rthis;
|
||||||
@@ -323,28 +352,16 @@ namespace
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::StereoBeliefPropagation::operator()(const GpuMat& left, const GpuMat& right, GpuMat& disp)
|
|
||||||
{
|
|
||||||
::StereoBeliefPropagationImpl impl(*this, u, d, l, r, u2, d2, l2, r2, datas, out);
|
|
||||||
impl(left, right, disp, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
void cv::gpu::StereoBeliefPropagation::operator()(const GpuMat& left, const GpuMat& right, GpuMat& disp, Stream& stream)
|
void cv::gpu::StereoBeliefPropagation::operator()(const GpuMat& left, const GpuMat& right, GpuMat& disp, Stream& stream)
|
||||||
{
|
{
|
||||||
::StereoBeliefPropagationImpl impl(*this, u, d, l, r, u2, d2, l2, r2, datas, out);
|
::StereoBeliefPropagationImpl impl(*this, u, d, l, r, u2, d2, l2, r2, datas, out);
|
||||||
impl(left, right, disp, StreamAccessor::getStream(stream));
|
impl(left, right, disp, stream);
|
||||||
}
|
|
||||||
|
|
||||||
void cv::gpu::StereoBeliefPropagation::operator()(const GpuMat& data, GpuMat& disp)
|
|
||||||
{
|
|
||||||
::StereoBeliefPropagationImpl impl(*this, u, d, l, r, u2, d2, l2, r2, datas, out);
|
|
||||||
impl(data, disp, 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::StereoBeliefPropagation::operator()(const GpuMat& data, GpuMat& disp, Stream& stream)
|
void cv::gpu::StereoBeliefPropagation::operator()(const GpuMat& data, GpuMat& disp, Stream& stream)
|
||||||
{
|
{
|
||||||
::StereoBeliefPropagationImpl impl(*this, u, d, l, r, u2, d2, l2, r2, datas, out);
|
::StereoBeliefPropagationImpl impl(*this, u, d, l, r, u2, d2, l2, r2, datas, out);
|
||||||
impl(data, disp, StreamAccessor::getStream(stream));
|
impl(data, disp, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* !defined (HAVE_CUDA) */
|
#endif /* !defined (HAVE_CUDA) */
|
||||||
|
|||||||
@@ -53,7 +53,6 @@ void cv::gpu::StereoConstantSpaceBP::estimateRecommendedParams(int, int, int&, i
|
|||||||
cv::gpu::StereoConstantSpaceBP::StereoConstantSpaceBP(int, int, int, int, int) { throw_nogpu(); }
|
cv::gpu::StereoConstantSpaceBP::StereoConstantSpaceBP(int, int, int, int, int) { throw_nogpu(); }
|
||||||
cv::gpu::StereoConstantSpaceBP::StereoConstantSpaceBP(int, int, int, int, float, float, float, float, int, int) { throw_nogpu(); }
|
cv::gpu::StereoConstantSpaceBP::StereoConstantSpaceBP(int, int, int, int, float, float, float, float, int, int) { throw_nogpu(); }
|
||||||
|
|
||||||
void cv::gpu::StereoConstantSpaceBP::operator()(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); }
|
|
||||||
void cv::gpu::StereoConstantSpaceBP::operator()(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
void cv::gpu::StereoConstantSpaceBP::operator()(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||||
|
|
||||||
#else /* !defined (HAVE_CUDA) */
|
#else /* !defined (HAVE_CUDA) */
|
||||||
@@ -136,7 +135,7 @@ cv::gpu::StereoConstantSpaceBP::StereoConstantSpaceBP(int ndisp_, int iters_, in
|
|||||||
template<class T>
|
template<class T>
|
||||||
static void csbp_operator(StereoConstantSpaceBP& rthis, GpuMat u[2], GpuMat d[2], GpuMat l[2], GpuMat r[2],
|
static void csbp_operator(StereoConstantSpaceBP& rthis, GpuMat u[2], GpuMat d[2], GpuMat l[2], GpuMat r[2],
|
||||||
GpuMat disp_selected_pyr[2], GpuMat& data_cost, GpuMat& data_cost_selected,
|
GpuMat disp_selected_pyr[2], GpuMat& data_cost, GpuMat& data_cost_selected,
|
||||||
GpuMat& temp, GpuMat& out, const GpuMat& left, const GpuMat& right, GpuMat& disp, cudaStream_t stream)
|
GpuMat& temp, GpuMat& out, const GpuMat& left, const GpuMat& right, GpuMat& disp, Stream& stream)
|
||||||
{
|
{
|
||||||
CV_DbgAssert(0 < rthis.ndisp && 0 < rthis.iters && 0 < rthis.levels && 0 < rthis.nr_plane
|
CV_DbgAssert(0 < rthis.ndisp && 0 < rthis.iters && 0 < rthis.levels && 0 < rthis.nr_plane
|
||||||
&& left.rows == right.rows && left.cols == right.cols && left.type() == right.type());
|
&& left.rows == right.rows && left.cols == right.cols && left.type() == right.type());
|
||||||
@@ -145,6 +144,8 @@ static void csbp_operator(StereoConstantSpaceBP& rthis, GpuMat u[2], GpuMat d[2]
|
|||||||
|
|
||||||
const Scalar zero = Scalar::all(0);
|
const Scalar zero = Scalar::all(0);
|
||||||
|
|
||||||
|
cudaStream_t cudaStream = StreamAccessor::getStream(stream);
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Init
|
// Init
|
||||||
|
|
||||||
@@ -210,18 +211,36 @@ static void csbp_operator(StereoConstantSpaceBP& rthis, GpuMat u[2], GpuMat d[2]
|
|||||||
csbp::load_constants(rthis.ndisp, rthis.max_data_term, rthis.data_weight,
|
csbp::load_constants(rthis.ndisp, rthis.max_data_term, rthis.data_weight,
|
||||||
rthis.max_disc_term, rthis.disc_single_jump, rthis.min_disp_th, left, right, temp);
|
rthis.max_disc_term, rthis.disc_single_jump, rthis.min_disp_th, left, right, temp);
|
||||||
|
|
||||||
l[0] = zero;
|
if (stream)
|
||||||
d[0] = zero;
|
{
|
||||||
r[0] = zero;
|
stream.enqueueMemSet(l[0], zero);
|
||||||
u[0] = zero;
|
stream.enqueueMemSet(d[0], zero);
|
||||||
|
stream.enqueueMemSet(r[0], zero);
|
||||||
|
stream.enqueueMemSet(u[0], zero);
|
||||||
|
|
||||||
l[1] = zero;
|
stream.enqueueMemSet(l[1], zero);
|
||||||
d[1] = zero;
|
stream.enqueueMemSet(d[1], zero);
|
||||||
r[1] = zero;
|
stream.enqueueMemSet(r[1], zero);
|
||||||
u[1] = zero;
|
stream.enqueueMemSet(u[1], zero);
|
||||||
|
|
||||||
data_cost = zero;
|
stream.enqueueMemSet(data_cost, zero);
|
||||||
data_cost_selected = zero;
|
stream.enqueueMemSet(data_cost_selected, zero);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
l[0].setTo(zero);
|
||||||
|
d[0].setTo(zero);
|
||||||
|
r[0].setTo(zero);
|
||||||
|
u[0].setTo(zero);
|
||||||
|
|
||||||
|
l[1].setTo(zero);
|
||||||
|
d[1].setTo(zero);
|
||||||
|
r[1].setTo(zero);
|
||||||
|
u[1].setTo(zero);
|
||||||
|
|
||||||
|
data_cost.setTo(zero);
|
||||||
|
data_cost_selected.setTo(zero);
|
||||||
|
}
|
||||||
|
|
||||||
int cur_idx = 0;
|
int cur_idx = 0;
|
||||||
|
|
||||||
@@ -230,12 +249,12 @@ static void csbp_operator(StereoConstantSpaceBP& rthis, GpuMat u[2], GpuMat d[2]
|
|||||||
if (i == levels - 1)
|
if (i == levels - 1)
|
||||||
{
|
{
|
||||||
csbp::init_data_cost(left.rows, left.cols, disp_selected_pyr[cur_idx].ptr<T>(), data_cost_selected.ptr<T>(),
|
csbp::init_data_cost(left.rows, left.cols, disp_selected_pyr[cur_idx].ptr<T>(), data_cost_selected.ptr<T>(),
|
||||||
step_pyr[i], rows_pyr[i], cols_pyr[i], i, nr_plane_pyr[i], rthis.ndisp, left.channels(), rthis.use_local_init_data_cost, stream);
|
step_pyr[i], rows_pyr[i], cols_pyr[i], i, nr_plane_pyr[i], rthis.ndisp, left.channels(), rthis.use_local_init_data_cost, cudaStream);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
csbp::compute_data_cost(disp_selected_pyr[cur_idx].ptr<T>(), data_cost.ptr<T>(), step_pyr[i], step_pyr[i+1],
|
csbp::compute_data_cost(disp_selected_pyr[cur_idx].ptr<T>(), data_cost.ptr<T>(), step_pyr[i], step_pyr[i+1],
|
||||||
left.rows, left.cols, rows_pyr[i], cols_pyr[i], rows_pyr[i+1], i, nr_plane_pyr[i+1], left.channels(), stream);
|
left.rows, left.cols, rows_pyr[i], cols_pyr[i], rows_pyr[i+1], i, nr_plane_pyr[i+1], left.channels(), cudaStream);
|
||||||
|
|
||||||
int new_idx = (cur_idx + 1) & 1;
|
int new_idx = (cur_idx + 1) & 1;
|
||||||
|
|
||||||
@@ -243,46 +262,49 @@ static void csbp_operator(StereoConstantSpaceBP& rthis, GpuMat u[2], GpuMat d[2]
|
|||||||
u[cur_idx].ptr<T>(), d[cur_idx].ptr<T>(), l[cur_idx].ptr<T>(), r[cur_idx].ptr<T>(),
|
u[cur_idx].ptr<T>(), d[cur_idx].ptr<T>(), l[cur_idx].ptr<T>(), r[cur_idx].ptr<T>(),
|
||||||
disp_selected_pyr[new_idx].ptr<T>(), disp_selected_pyr[cur_idx].ptr<T>(),
|
disp_selected_pyr[new_idx].ptr<T>(), disp_selected_pyr[cur_idx].ptr<T>(),
|
||||||
data_cost_selected.ptr<T>(), data_cost.ptr<T>(), step_pyr[i], step_pyr[i+1], rows_pyr[i],
|
data_cost_selected.ptr<T>(), data_cost.ptr<T>(), step_pyr[i], step_pyr[i+1], rows_pyr[i],
|
||||||
cols_pyr[i], nr_plane_pyr[i], rows_pyr[i+1], cols_pyr[i+1], nr_plane_pyr[i+1], stream);
|
cols_pyr[i], nr_plane_pyr[i], rows_pyr[i+1], cols_pyr[i+1], nr_plane_pyr[i+1], cudaStream);
|
||||||
|
|
||||||
cur_idx = new_idx;
|
cur_idx = new_idx;
|
||||||
}
|
}
|
||||||
|
|
||||||
csbp::calc_all_iterations(u[cur_idx].ptr<T>(), d[cur_idx].ptr<T>(), l[cur_idx].ptr<T>(), r[cur_idx].ptr<T>(),
|
csbp::calc_all_iterations(u[cur_idx].ptr<T>(), d[cur_idx].ptr<T>(), l[cur_idx].ptr<T>(), r[cur_idx].ptr<T>(),
|
||||||
data_cost_selected.ptr<T>(), disp_selected_pyr[cur_idx].ptr<T>(), step_pyr[i],
|
data_cost_selected.ptr<T>(), disp_selected_pyr[cur_idx].ptr<T>(), step_pyr[i],
|
||||||
rows_pyr[i], cols_pyr[i], nr_plane_pyr[i], rthis.iters, stream);
|
rows_pyr[i], cols_pyr[i], nr_plane_pyr[i], rthis.iters, cudaStream);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (disp.empty())
|
if (disp.empty())
|
||||||
disp.create(rows, cols, CV_16S);
|
disp.create(rows, cols, CV_16S);
|
||||||
|
|
||||||
out = ((disp.type() == CV_16S) ? disp : (out.create(rows, cols, CV_16S), out));
|
out = ((disp.type() == CV_16S) ? disp : (out.create(rows, cols, CV_16S), out));
|
||||||
out = zero;
|
|
||||||
|
if (stream)
|
||||||
|
stream.enqueueMemSet(out, zero);
|
||||||
|
else
|
||||||
|
out.setTo(zero);
|
||||||
|
|
||||||
csbp::compute_disp(u[cur_idx].ptr<T>(), d[cur_idx].ptr<T>(), l[cur_idx].ptr<T>(), r[cur_idx].ptr<T>(),
|
csbp::compute_disp(u[cur_idx].ptr<T>(), d[cur_idx].ptr<T>(), l[cur_idx].ptr<T>(), r[cur_idx].ptr<T>(),
|
||||||
data_cost_selected.ptr<T>(), disp_selected_pyr[cur_idx].ptr<T>(), step_pyr[0], out, nr_plane_pyr[0], stream);
|
data_cost_selected.ptr<T>(), disp_selected_pyr[cur_idx].ptr<T>(), step_pyr[0], out, nr_plane_pyr[0], cudaStream);
|
||||||
|
|
||||||
if (disp.type() != CV_16S)
|
if (disp.type() != CV_16S)
|
||||||
out.convertTo(disp, disp.type());
|
{
|
||||||
|
if (stream)
|
||||||
|
stream.enqueueConvert(out, disp, disp.type());
|
||||||
|
else
|
||||||
|
out.convertTo(disp, disp.type());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
typedef void (*csbp_operator_t)(StereoConstantSpaceBP& rthis, GpuMat u[2], GpuMat d[2], GpuMat l[2], GpuMat r[2],
|
typedef void (*csbp_operator_t)(StereoConstantSpaceBP& rthis, GpuMat u[2], GpuMat d[2], GpuMat l[2], GpuMat r[2],
|
||||||
GpuMat disp_selected_pyr[2], GpuMat& data_cost, GpuMat& data_cost_selected,
|
GpuMat disp_selected_pyr[2], GpuMat& data_cost, GpuMat& data_cost_selected,
|
||||||
GpuMat& temp, GpuMat& out, const GpuMat& left, const GpuMat& right, GpuMat& disp, cudaStream_t stream);
|
GpuMat& temp, GpuMat& out, const GpuMat& left, const GpuMat& right, GpuMat& disp, Stream& stream);
|
||||||
|
|
||||||
const static csbp_operator_t operators[] = {0, 0, 0, csbp_operator<short>, 0, csbp_operator<float>, 0, 0};
|
const static csbp_operator_t operators[] = {0, 0, 0, csbp_operator<short>, 0, csbp_operator<float>, 0, 0};
|
||||||
|
|
||||||
void cv::gpu::StereoConstantSpaceBP::operator()(const GpuMat& left, const GpuMat& right, GpuMat& disp)
|
|
||||||
{
|
|
||||||
CV_Assert(msg_type == CV_32F || msg_type == CV_16S);
|
|
||||||
operators[msg_type](*this, u, d, l, r, disp_selected_pyr, data_cost, data_cost_selected, temp, out, left, right, disp, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
void cv::gpu::StereoConstantSpaceBP::operator()(const GpuMat& left, const GpuMat& right, GpuMat& disp, Stream& stream)
|
void cv::gpu::StereoConstantSpaceBP::operator()(const GpuMat& left, const GpuMat& right, GpuMat& disp, Stream& stream)
|
||||||
{
|
{
|
||||||
CV_Assert(msg_type == CV_32F || msg_type == CV_16S);
|
CV_Assert(msg_type == CV_32F || msg_type == CV_16S);
|
||||||
operators[msg_type](*this, u, d, l, r, disp_selected_pyr, data_cost, data_cost_selected, temp, out, left, right, disp, StreamAccessor::getStream(stream));
|
operators[msg_type](*this, u, d, l, r, disp_selected_pyr, data_cost, data_cost_selected, temp, out, left, right, disp, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* !defined (HAVE_CUDA) */
|
#endif /* !defined (HAVE_CUDA) */
|
||||||
|
|||||||
Reference in New Issue
Block a user