merged the trunk r8669:8702
This commit is contained in:
parent
150aeee58f
commit
6cca6a4548
3rdparty/tbb
cmake
doc/tutorials/introduction/android_binary_package
modules
contrib/src
core
gpu
perf
src
brute_force_matcher.cpp
cuda
bf_knnmatch.cubf_match.cubf_radius_match.cubilateral_filter.cublend.cucalib3d.cucanny.cucolor.cucolumn_filter.cucopy_make_border.cufast.cugftt.cuhist.cuhog.cuimgproc.cuinternal_shared.hppmatch_template.cumathfunc.cumatrix_reductions.cuoptical_flow.cuorb.curemap.curesize.curow_filter.cusafe_call.hppsplit_merge.custereobm.custereobp.custereocsbp.cuwarp.cu
hog.cppmssegmentation.cppopencv2/gpu/device
stereocsbp.cppml/doc
samples
1
3rdparty/tbb/CMakeLists.txt
vendored
1
3rdparty/tbb/CMakeLists.txt
vendored
@ -114,6 +114,7 @@ add_definitions(-D__TBB_DYNAMIC_LOAD_ENABLED=0 #required
|
||||
if(tbb_need_GENERIC_DWORD_LOAD_STORE)
|
||||
#needed by TBB 4.0 update 1,2; fixed in TBB 4.0 update 3 but it has 2 new problems
|
||||
add_definitions(-D__TBB_USE_GENERIC_DWORD_LOAD_STORE=1)
|
||||
set(tbb_need_GENERIC_DWORD_LOAD_STORE ON PARENT_SCOPE)
|
||||
endif()
|
||||
|
||||
add_library(tbb STATIC ${lib_srcs} ${lib_hdrs} "${CMAKE_CURRENT_SOURCE_DIR}/android_additional.h" "${CMAKE_CURRENT_SOURCE_DIR}/${tbb_version_file}")
|
||||
|
155
cmake/OpenCVConfig.cmake
Normal file
155
cmake/OpenCVConfig.cmake
Normal file
@ -0,0 +1,155 @@
|
||||
# ===================================================================================
|
||||
# The OpenCV CMake configuration file
|
||||
#
|
||||
# ** File generated automatically, do not modify **
|
||||
#
|
||||
# Usage from an external project:
|
||||
# In your CMakeLists.txt, add these lines:
|
||||
#
|
||||
# FIND_PACKAGE(OpenCV REQUIRED)
|
||||
# TARGET_LINK_LIBRARIES(MY_TARGET_NAME ${OpenCV_LIBS})
|
||||
#
|
||||
# Or you can search for specific OpenCV modules:
|
||||
#
|
||||
# FIND_PACKAGE(OpenCV REQUIRED core highgui)
|
||||
#
|
||||
# If the module is found then OPENCV_<MODULE>_FOUND is set to TRUE.
|
||||
#
|
||||
# This file will define the following variables:
|
||||
# - OpenCV_LIBS : The list of libraries to links against.
|
||||
# - OpenCV_LIB_DIR : The directory(es) where lib files are. Calling LINK_DIRECTORIES
|
||||
# with this path is NOT needed.
|
||||
# - OpenCV_INCLUDE_DIRS : The OpenCV include directories.
|
||||
# - OpenCV_COMPUTE_CAPABILITIES : The version of compute capability
|
||||
# - OpenCV_ANDROID_NATIVE_API_LEVEL : Minimum required level of Android API
|
||||
# - OpenCV_VERSION : The version of this OpenCV build. Example: "2.4.0"
|
||||
# - OpenCV_VERSION_MAJOR : Major version part of OpenCV_VERSION. Example: "2"
|
||||
# - OpenCV_VERSION_MINOR : Minor version part of OpenCV_VERSION. Example: "4"
|
||||
# - OpenCV_VERSION_PATCH : Patch version part of OpenCV_VERSION. Example: "0"
|
||||
#
|
||||
# Advanced variables:
|
||||
# - OpenCV_SHARED
|
||||
# - OpenCV_CONFIG_PATH
|
||||
# - OpenCV_INSTALL_PATH (not set on Windows)
|
||||
# - OpenCV_LIB_COMPONENTS
|
||||
# - OpenCV_USE_MANGLED_PATHS
|
||||
# - OpenCV_HAVE_ANDROID_CAMERA
|
||||
#
|
||||
# ===================================================================================
|
||||
#
|
||||
# Windows pack specific options:
|
||||
# - OpenCV_STATIC
|
||||
# - OpenCV_CUDA
|
||||
|
||||
if(NOT DEFINED OpenCV_STATIC)
|
||||
# look for global setting
|
||||
if(NOT DEFINED BUILD_SHARED_LIBS OR BUILD_SHARED_LIBS)
|
||||
set(OpenCV_STATIC OFF)
|
||||
else()
|
||||
set(OpenCV_STATIC ON)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(NOT DEFINED OpenCV_CUDA)
|
||||
# if user' app uses CUDA, then it probably wants CUDA-enabled OpenCV binaries
|
||||
if(CUDA_FOUND)
|
||||
set(OpenCV_CUDA ON)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(MSVC)
|
||||
if(CMAKE_CL_64)
|
||||
set(OpenCV_ARCH x64)
|
||||
set(OpenCV_TBB_ARCH intel64)
|
||||
else()
|
||||
set(OpenCV_ARCH x86)
|
||||
set(OpenCV_TBB_ARCH ia32)
|
||||
endif()
|
||||
if(MSVC_VERSION EQUAL 1400)
|
||||
set(OpenCV_RUNTIME vc8)
|
||||
elseif(MSVC_VERSION EQUAL 1500)
|
||||
set(OpenCV_RUNTIME vc9)
|
||||
elseif(MSVC_VERSION EQUAL 1600)
|
||||
set(OpenCV_RUNTIME vc10)
|
||||
elseif(MSVC_VERSION EQUAL 1700)
|
||||
set(OpenCV_RUNTIME vc11)
|
||||
endif()
|
||||
elseif(MINGW)
|
||||
set(OpenCV_RUNTIME mingw)
|
||||
|
||||
execute_process(COMMAND ${CMAKE_CXX_COMPILER} -dumpmachine
|
||||
OUTPUT_VARIABLE OPENCV_GCC_TARGET_MACHINE
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
if(CMAKE_OPENCV_GCC_TARGET_MACHINE MATCHES "64")
|
||||
set(MINGW64 1)
|
||||
set(OpenCV_ARCH x64)
|
||||
else()
|
||||
set(OpenCV_ARCH x86)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(CMAKE_VERSION VERSION_GREATER 2.6.2)
|
||||
unset(OpenCV_CONFIG_PATH CACHE)
|
||||
endif()
|
||||
|
||||
get_filename_component(OpenCV_CONFIG_PATH "${CMAKE_CURRENT_LIST_FILE}" PATH CACHE)
|
||||
if(OpenCV_RUNTIME AND OpenCV_ARCH)
|
||||
if(OpenCV_STATIC AND EXISTS "${OpenCV_CONFIG_PATH}/${OpenCV_ARCH}/${OpenCV_RUNTIME}/staticlib/OpenCVConfig.cmake")
|
||||
if(OpenCV_CUDA AND EXISTS "${OpenCV_CONFIG_PATH}/gpu/${OpenCV_ARCH}/${OpenCV_RUNTIME}/staticlib/OpenCVConfig.cmake")
|
||||
set(OpenCV_LIB_PATH "${OpenCV_CONFIG_PATH}/gpu/${OpenCV_ARCH}/${OpenCV_RUNTIME}/staticlib")
|
||||
else()
|
||||
set(OpenCV_LIB_PATH "${OpenCV_CONFIG_PATH}/${OpenCV_ARCH}/${OpenCV_RUNTIME}/staticlib")
|
||||
endif()
|
||||
elseif(EXISTS "${OpenCV_CONFIG_PATH}/${OpenCV_ARCH}/${OpenCV_RUNTIME}/lib/OpenCVConfig.cmake")
|
||||
if(OpenCV_CUDA AND EXISTS "${OpenCV_CONFIG_PATH}/gpu/${OpenCV_ARCH}/${OpenCV_RUNTIME}/lib/OpenCVConfig.cmake")
|
||||
set(OpenCV_LIB_PATH "${OpenCV_CONFIG_PATH}/gpu/${OpenCV_ARCH}/${OpenCV_RUNTIME}/lib")
|
||||
else()
|
||||
set(OpenCV_LIB_PATH "${OpenCV_CONFIG_PATH}/${OpenCV_ARCH}/${OpenCV_RUNTIME}/lib")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(OpenCV_LIB_PATH AND EXISTS "${OpenCV_LIB_PATH}/OpenCVConfig.cmake")
|
||||
set(OpenCV_LIB_DIR_OPT "${OpenCV_LIB_PATH}" CACHE PATH "Path where release OpenCV libraries are located" FORCE)
|
||||
set(OpenCV_LIB_DIR_DBG "${OpenCV_LIB_PATH}" CACHE PATH "Path where debug OpenCV libraries are located" FORCE)
|
||||
set(OpenCV_3RDPARTY_LIB_DIR_OPT "${OpenCV_LIB_PATH}" CACHE PATH "Path where release 3rdpaty OpenCV dependencies are located" FORCE)
|
||||
set(OpenCV_3RDPARTY_LIB_DIR_DBG "${OpenCV_LIB_PATH}" CACHE PATH "Path where debug 3rdpaty OpenCV dependencies are located" FORCE)
|
||||
|
||||
include("${OpenCV_LIB_PATH}/OpenCVConfig.cmake")
|
||||
|
||||
if(OpenCV_CUDA)
|
||||
set(_OpenCV_LIBS "")
|
||||
foreach(_lib ${OpenCV_LIBS})
|
||||
string(REPLACE "${OpenCV_CONFIG_PATH}/gpu/${OpenCV_ARCH}/${OpenCV_RUNTIME}" "${OpenCV_CONFIG_PATH}/${OpenCV_ARCH}/${OpenCV_RUNTIME}" _lib2 "${_lib}")
|
||||
if(NOT EXISTS "${_lib}" AND EXISTS "${_lib2}")
|
||||
list(APPEND _OpenCV_LIBS "${_lib2}")
|
||||
else()
|
||||
list(APPEND _OpenCV_LIBS "${_lib}")
|
||||
endif()
|
||||
endforeach()
|
||||
set(OpenCV_LIBS ${_OpenCV_LIBS})
|
||||
endif()
|
||||
set(OpenCV_FOUND TRUE CACHE BOOL "" FORCE)
|
||||
set(OPENCV_FOUND TRUE CACHE BOOL "" FORCE)
|
||||
|
||||
if(NOT OpenCV_FIND_QUIETLY)
|
||||
message(STATUS "Found OpenCV ${OpenCV_VERSION} in ${OpenCV_LIB_PATH}")
|
||||
if(NOT OpenCV_LIB_PATH MATCHES "/staticlib")
|
||||
get_filename_component(_OpenCV_LIB_PATH "${OpenCV_LIB_PATH}/../bin" ABSOLUTE)
|
||||
file(TO_NATIVE_PATH "${_OpenCV_LIB_PATH}" _OpenCV_LIB_PATH)
|
||||
message(STATUS "You might need to add ${_OpenCV_LIB_PATH} to your PATH to be able to run your applications.")
|
||||
if(OpenCV_LIB_PATH MATCHES "/gpu/")
|
||||
string(REPLACE "\\gpu" "" _OpenCV_LIB_PATH2 "${_OpenCV_LIB_PATH}")
|
||||
message(STATUS "GPU support is enabled so you might also need ${_OpenCV_LIB_PATH2} in your PATH (it must go after the ${_OpenCV_LIB_PATH}).")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
else()
|
||||
if(NOT OpenCV_FIND_QUIETLY)
|
||||
message(WARNING "Found OpenCV 2.4.0 Windows Super Pack but it has not binaries compatible with your configuration.
|
||||
You should manually point CMake variable OpenCV_DIR to your build of OpenCV library.")
|
||||
endif()
|
||||
set(OpenCV_FOUND FALSE CACHE BOOL "" FORCE)
|
||||
set(OPENCV_FOUND FALSE CACHE BOOL "" FORCE)
|
||||
endif()
|
||||
|
@ -2,7 +2,10 @@ if(ANDROID)
|
||||
add_subdirectory("${OpenCV_SOURCE_DIR}/3rdparty/tbb")
|
||||
include_directories(SYSTEM ${TBB_INCLUDE_DIRS})
|
||||
set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} tbb)
|
||||
add_definitions(-DTBB_USE_GCC_BUILTINS=1 -D__TBB_GCC_BUILTIN_ATOMICS_PRESENT=1 -D__TBB_USE_GENERIC_DWORD_LOAD_STORE=1)
|
||||
add_definitions(-DTBB_USE_GCC_BUILTINS=1 -D__TBB_GCC_BUILTIN_ATOMICS_PRESENT=1)
|
||||
if(tbb_need_GENERIC_DWORD_LOAD_STORE)
|
||||
add_definitions(-D__TBB_USE_GENERIC_DWORD_LOAD_STORE=1)
|
||||
endif()
|
||||
set(HAVE_TBB 1)
|
||||
elseif(UNIX AND NOT APPLE)
|
||||
PKG_CHECK_MODULES(TBB tbb)
|
||||
|
@ -72,17 +72,13 @@ macro(ocv_add_dependencies full_modname)
|
||||
list(APPEND ${__depsvar} "${d}")
|
||||
endif()
|
||||
endforeach()
|
||||
unset(__depsvar)
|
||||
|
||||
ocv_list_unique(OPENCV_MODULE_${full_modname}_REQ_DEPS)
|
||||
ocv_list_unique(OPENCV_MODULE_${full_modname}_OPT_DEPS)
|
||||
|
||||
if(OPENCV_MODULE_${full_modname}_REQ_DEPS)
|
||||
list(REMOVE_DUPLICATES OPENCV_MODULE_${full_modname}_REQ_DEPS)
|
||||
endif()
|
||||
if(OPENCV_MODULE_${full_modname}_OPT_DEPS)
|
||||
list(REMOVE_DUPLICATES OPENCV_MODULE_${full_modname}_OPT_DEPS)
|
||||
endif()
|
||||
set(OPENCV_MODULE_${full_modname}_REQ_DEPS ${OPENCV_MODULE_${full_modname}_REQ_DEPS} CACHE INTERNAL "Required dependencies of ${full_modname} module")
|
||||
set(OPENCV_MODULE_${full_modname}_OPT_DEPS ${OPENCV_MODULE_${full_modname}_OPT_DEPS} CACHE INTERNAL "Optional dependencies of ${full_modname} module")
|
||||
|
||||
unset(__depsvar)
|
||||
endmacro()
|
||||
|
||||
# declare new OpenCV module in current folder
|
||||
@ -105,19 +101,22 @@ macro(ocv_add_module _name)
|
||||
")
|
||||
endif()
|
||||
|
||||
#remember module details
|
||||
if(NOT DEFINED the_description)
|
||||
set(the_description "The ${name} OpenCV module")
|
||||
endif()
|
||||
set(OPENCV_MODULE_${the_module}_DESCRIPTION "${the_description}" CACHE INTERNAL "Brief description of ${the_module} module")
|
||||
set(OPENCV_MODULE_${the_module}_LOCATION "${CMAKE_CURRENT_SOURCE_DIR}" CACHE INTERNAL "Location of ${the_module} module sources")
|
||||
|
||||
#create option to enable/disable this module
|
||||
if(NOT DEFINED BUILD_${the_module}_INIT)
|
||||
set(BUILD_${the_module}_INIT ON)
|
||||
endif()
|
||||
|
||||
# create option to enable/disable this module
|
||||
option(BUILD_${the_module} "Include ${the_module} module into the OpenCV build" ${BUILD_${the_module}_INIT})
|
||||
|
||||
# remember the module details
|
||||
set(OPENCV_MODULE_${the_module}_DESCRIPTION "${the_description}" CACHE INTERNAL "Brief description of ${the_module} module")
|
||||
set(OPENCV_MODULE_${the_module}_LOCATION "${CMAKE_CURRENT_SOURCE_DIR}" CACHE INTERNAL "Location of ${the_module} module sources")
|
||||
|
||||
# parse list of dependencies
|
||||
if("${ARGV1}" STREQUAL "INTERNAL" OR "${ARGV1}" STREQUAL "BINDINGS")
|
||||
set(OPENCV_MODULE_${the_module}_CLASS "${ARGV1}" CACHE INTERNAL "The cathegory of the module")
|
||||
set(__ocv_argn__ ${ARGN})
|
||||
@ -143,28 +142,19 @@ macro(ocv_add_module _name)
|
||||
set(OPENCV_MODULES_DISABLED_USER ${OPENCV_MODULES_DISABLED_USER} "${the_module}" CACHE INTERNAL "List of OpenCV modules explicitly disabled by user")
|
||||
endif()
|
||||
|
||||
#TODO: add submodules if any
|
||||
# TODO: add submodules if any
|
||||
|
||||
#stop processing of current file
|
||||
# stop processing of current file
|
||||
return()
|
||||
else(OPENCV_INITIAL_PASS)
|
||||
if(NOT BUILD_${the_module})
|
||||
#extra protection from redefinition
|
||||
return()
|
||||
return() # extra protection from redefinition
|
||||
endif()
|
||||
project(${the_module})
|
||||
endif(OPENCV_INITIAL_PASS)
|
||||
endmacro()
|
||||
|
||||
# Internal macro; disables OpenCV module
|
||||
# ocv_module_turn_off(<module name>)
|
||||
macro(__ocv_module_turn_off the_module)
|
||||
list(APPEND OPENCV_MODULES_DISABLED_AUTO "${the_module}")
|
||||
list(REMOVE_ITEM OPENCV_MODULES_BUILD "${the_module}")
|
||||
list(REMOVE_ITEM OPENCV_MODULES_PUBLIC "${the_module}")
|
||||
set(HAVE_${the_module} OFF CACHE INTERNAL "Module ${the_module} can not be built in current configuration")
|
||||
endmacro()
|
||||
|
||||
# excludes module from current configuration
|
||||
macro(ocv_module_disable module)
|
||||
set(__modname ${module})
|
||||
if(NOT __modname MATCHES "^opencv_")
|
||||
@ -175,41 +165,46 @@ macro(ocv_module_disable module)
|
||||
set(OPENCV_MODULE_${__modname}_LOCATION "${CMAKE_CURRENT_SOURCE_DIR}" CACHE INTERNAL "Location of ${__modname} module sources")
|
||||
set(OPENCV_MODULES_DISABLED_FORCE "${OPENCV_MODULES_DISABLED_FORCE}" CACHE INTERNAL "List of OpenCV modules which can not be build in current configuration")
|
||||
unset(__modname)
|
||||
return()#leave the current folder
|
||||
return() # leave the current folder
|
||||
endmacro()
|
||||
|
||||
|
||||
# Internal macro; partly disables OpenCV module
|
||||
macro(__ocv_module_turn_off the_module)
|
||||
list(APPEND OPENCV_MODULES_DISABLED_AUTO "${the_module}")
|
||||
list(REMOVE_ITEM OPENCV_MODULES_BUILD "${the_module}")
|
||||
list(REMOVE_ITEM OPENCV_MODULES_PUBLIC "${the_module}")
|
||||
set(HAVE_${the_module} OFF CACHE INTERNAL "Module ${the_module} can not be built in current configuration")
|
||||
endmacro()
|
||||
|
||||
# Internal macro for dependencies tracking
|
||||
macro(__ocv_flatten_module_required_dependencies the_module)
|
||||
set(__flattened_deps "")
|
||||
set(__resolved_deps "")
|
||||
set(__req_depends ${OPENCV_MODULE_${the_module}_REQ_DEPS})
|
||||
|
||||
while(__req_depends)
|
||||
list(GET __req_depends 0 __dep)
|
||||
list(REMOVE_AT __req_depends 0)
|
||||
ocv_list_pop_front(__req_depends __dep)
|
||||
if(__dep STREQUAL the_module)
|
||||
#TODO: think how to deal with cyclic dependency
|
||||
__ocv_module_turn_off(${the_module})
|
||||
__ocv_module_turn_off(${the_module}) # TODO: think how to deal with cyclic dependency
|
||||
break()
|
||||
elseif("${OPENCV_MODULES_DISABLED_USER};${OPENCV_MODULES_DISABLED_AUTO}" MATCHES "(^|;)${__dep}(;|$)")
|
||||
#depends on disabled module
|
||||
__ocv_module_turn_off(${the_module})
|
||||
elseif(";${OPENCV_MODULES_DISABLED_USER};${OPENCV_MODULES_DISABLED_AUTO};" MATCHES ";${__dep};")
|
||||
__ocv_module_turn_off(${the_module}) # depends on disabled module
|
||||
break()
|
||||
elseif("${OPENCV_MODULES_BUILD}" MATCHES "(^|;)${__dep}(;|$)")
|
||||
if(__resolved_deps MATCHES "(^|;)${__dep}(;|$)")
|
||||
#all dependencies of this module are already resolved
|
||||
list(APPEND __flattened_deps "${__dep}")
|
||||
elseif(";${OPENCV_MODULES_BUILD};" MATCHES ";${__dep};")
|
||||
if(";${__resolved_deps};" MATCHES ";${__dep};")
|
||||
list(APPEND __flattened_deps "${__dep}") # all dependencies of this module are already resolved
|
||||
else()
|
||||
#put all required subdependencies before this dependency and mark it as resolved
|
||||
# put all required subdependencies before this dependency and mark it as resolved
|
||||
list(APPEND __resolved_deps "${__dep}")
|
||||
list(INSERT __req_depends 0 ${OPENCV_MODULE_${__dep}_REQ_DEPS} ${__dep})
|
||||
endif()
|
||||
elseif(__dep MATCHES "^opencv_")
|
||||
#depends on missing module
|
||||
__ocv_module_turn_off(${the_module})
|
||||
__ocv_module_turn_off(${the_module}) # depends on missing module
|
||||
message(WARNING "Unknown \"${__dep}\" module is listened in the dependencies of \"${the_module}\" module")
|
||||
break()
|
||||
else()
|
||||
#skip non-modules
|
||||
# skip non-modules
|
||||
endif()
|
||||
endwhile()
|
||||
|
||||
@ -220,37 +215,33 @@ macro(__ocv_flatten_module_required_dependencies the_module)
|
||||
set(OPENCV_MODULE_${the_module}_DEPS "")
|
||||
endif()
|
||||
|
||||
unset(__resolved_deps)
|
||||
unset(__flattened_deps)
|
||||
unset(__req_depends)
|
||||
unset(__dep)
|
||||
ocv_clear_vars(__resolved_deps __flattened_deps __req_depends __dep)
|
||||
endmacro()
|
||||
|
||||
# Internal macro for dependencies tracking
|
||||
macro(__ocv_flatten_module_optional_dependencies the_module)
|
||||
set(__flattened_deps ${OPENCV_MODULE_${the_module}_DEPS})
|
||||
set(__resolved_deps ${OPENCV_MODULE_${the_module}_DEPS})
|
||||
set(__opt_depends ${OPENCV_MODULE_${the_module}_OPT_DEPS})
|
||||
set(__flattened_deps "")
|
||||
set(__resolved_deps "")
|
||||
set(__opt_depends ${OPENCV_MODULE_${the_module}_REQ_DEPS} ${OPENCV_MODULE_${the_module}_OPT_DEPS})
|
||||
|
||||
while(__opt_depends)
|
||||
list(GET __opt_depends 0 __dep)
|
||||
list(REMOVE_AT __opt_depends 0)
|
||||
ocv_list_pop_front(__opt_depends __dep)
|
||||
if(__dep STREQUAL the_module)
|
||||
#TODO: think how to deal with cyclic dependency
|
||||
__ocv_module_turn_off(${the_module})
|
||||
__ocv_module_turn_off(${the_module}) # TODO: think how to deal with cyclic dependency
|
||||
break()
|
||||
elseif("${OPENCV_MODULES_BUILD}" MATCHES "(^|;)${__dep}(;|$)")
|
||||
if(__resolved_deps MATCHES "(^|;)${__dep}(;|$)")
|
||||
#all dependencies of this module are already resolved
|
||||
list(APPEND __flattened_deps "${__dep}")
|
||||
elseif(";${OPENCV_MODULES_BUILD};" MATCHES ";${__dep};")
|
||||
if(";${__resolved_deps};" MATCHES ";${__dep};")
|
||||
list(APPEND __flattened_deps "${__dep}") # all dependencies of this module are already resolved
|
||||
else()
|
||||
#put all subdependencies before this dependency and mark it as resolved
|
||||
# put all subdependencies before this dependency and mark it as resolved
|
||||
list(APPEND __resolved_deps "${__dep}")
|
||||
list(INSERT __opt_depends 0 ${OPENCV_MODULE_${__dep}_REQ_DEPS} ${OPENCV_MODULE_${__dep}_OPT_DEPS} ${__dep})
|
||||
endif()
|
||||
else()
|
||||
#skip non-modules or missing modules
|
||||
# skip non-modules or missing modules
|
||||
endif()
|
||||
endwhile()
|
||||
|
||||
if(__flattened_deps)
|
||||
list(REMOVE_DUPLICATES __flattened_deps)
|
||||
set(OPENCV_MODULE_${the_module}_DEPS ${__flattened_deps})
|
||||
@ -258,10 +249,7 @@ macro(__ocv_flatten_module_optional_dependencies the_module)
|
||||
set(OPENCV_MODULE_${the_module}_DEPS "")
|
||||
endif()
|
||||
|
||||
unset(__resolved_deps)
|
||||
unset(__flattened_deps)
|
||||
unset(__opt_depends)
|
||||
unset(__dep)
|
||||
ocv_clear_vars(__resolved_deps __flattened_deps __opt_depends __dep)
|
||||
endmacro()
|
||||
|
||||
macro(__ocv_flatten_module_dependencies)
|
||||
@ -269,16 +257,16 @@ macro(__ocv_flatten_module_dependencies)
|
||||
set(HAVE_${m} OFF CACHE INTERNAL "Module ${m} will not be built in current configuration")
|
||||
endforeach()
|
||||
foreach(m ${OPENCV_MODULES_BUILD})
|
||||
set(HAVE_${m} ON CACHE INTERNAL "Module ${m} will not be built in current configuration")
|
||||
set(HAVE_${m} ON CACHE INTERNAL "Module ${m} will be built in current configuration")
|
||||
__ocv_flatten_module_required_dependencies(${m})
|
||||
endforeach()
|
||||
|
||||
foreach(m ${OPENCV_MODULES_BUILD})
|
||||
__ocv_flatten_module_optional_dependencies(${m})
|
||||
|
||||
#dependencies from other modules
|
||||
# save dependencies from other modules
|
||||
set(OPENCV_MODULE_${m}_DEPS ${OPENCV_MODULE_${m}_DEPS} CACHE INTERNAL "Flattened dependencies of ${m} module")
|
||||
#extra dependencies
|
||||
# save extra dependencies
|
||||
set(OPENCV_MODULE_${m}_DEPS_EXT ${OPENCV_MODULE_${m}_REQ_DEPS} ${OPENCV_MODULE_${m}_OPT_DEPS})
|
||||
if(OPENCV_MODULE_${m}_DEPS_EXT AND OPENCV_MODULE_${m}_DEPS)
|
||||
list(REMOVE_ITEM OPENCV_MODULE_${m}_DEPS_EXT ${OPENCV_MODULE_${m}_DEPS})
|
||||
@ -287,8 +275,15 @@ macro(__ocv_flatten_module_dependencies)
|
||||
set(OPENCV_MODULE_${m}_DEPS_EXT ${OPENCV_MODULE_${m}_DEPS_EXT} CACHE INTERNAL "Extra dependencies of ${m} module")
|
||||
endforeach()
|
||||
|
||||
# order modules by dependencies
|
||||
set(OPENCV_MODULES_BUILD_ "")
|
||||
foreach(m ${OPENCV_MODULES_BUILD})
|
||||
list(APPEND OPENCV_MODULES_BUILD_ ${OPENCV_MODULE_${m}_DEPS} ${m})
|
||||
endforeach()
|
||||
ocv_list_unique(OPENCV_MODULES_BUILD_)
|
||||
|
||||
set(OPENCV_MODULES_PUBLIC ${OPENCV_MODULES_PUBLIC} CACHE INTERNAL "List of OpenCV modules marked for export")
|
||||
set(OPENCV_MODULES_BUILD ${OPENCV_MODULES_BUILD} CACHE INTERNAL "List of OpenCV modules included into the build")
|
||||
set(OPENCV_MODULES_BUILD ${OPENCV_MODULES_BUILD_} CACHE INTERNAL "List of OpenCV modules included into the build")
|
||||
set(OPENCV_MODULES_DISABLED_AUTO ${OPENCV_MODULES_DISABLED_AUTO} CACHE INTERNAL "List of OpenCV modules implicitly disabled due to dependencies")
|
||||
endmacro()
|
||||
|
||||
@ -300,10 +295,11 @@ macro(ocv_glob_modules)
|
||||
endif()
|
||||
set(__directories_observed "")
|
||||
|
||||
#collect modules
|
||||
# collect modules
|
||||
set(OPENCV_INITIAL_PASS ON)
|
||||
foreach(__path ${ARGN})
|
||||
ocv_get_real_path(__path "${__path}")
|
||||
|
||||
list(FIND __directories_observed "${__path}" __pathIdx)
|
||||
if(__pathIdx GREATER -1)
|
||||
message(FATAL_ERROR "The directory ${__path} is observed for OpenCV modules second time.")
|
||||
@ -316,37 +312,36 @@ macro(ocv_glob_modules)
|
||||
foreach(mod ${__ocvmodules})
|
||||
ocv_get_real_path(__modpath "${__path}/${mod}")
|
||||
if(EXISTS "${__modpath}/CMakeLists.txt")
|
||||
|
||||
list(FIND __directories_observed "${__modpath}" __pathIdx)
|
||||
if(__pathIdx GREATER -1)
|
||||
message(FATAL_ERROR "The module from ${__modpath} is already loaded.")
|
||||
endif()
|
||||
list(APPEND __directories_observed "${__modpath}")
|
||||
|
||||
add_subdirectory("${__modpath}" "${CMAKE_CURRENT_BINARY_DIR}/${mod}/.${mod}")
|
||||
if(OCV_MODULE_RELOCATE_ON_INITIAL_PASS)
|
||||
file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${mod}/.${mod}")
|
||||
file(COPY "${__modpath}/CMakeLists.txt" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/${mod}/.${mod}")
|
||||
add_subdirectory("${CMAKE_CURRENT_BINARY_DIR}/${mod}/.${mod}" "${CMAKE_CURRENT_BINARY_DIR}/${mod}/.${mod}")
|
||||
if("${OPENCV_MODULE_opencv_${mod}_LOCATION}" STREQUAL "${CMAKE_CURRENT_BINARY_DIR}/${mod}/.${mod}")
|
||||
set(OPENCV_MODULE_opencv_${mod}_LOCATION "${__modpath}" CACHE PATH "" FORCE)
|
||||
endif()
|
||||
else()
|
||||
add_subdirectory("${__modpath}" "${CMAKE_CURRENT_BINARY_DIR}/${mod}/.${mod}")
|
||||
endif()
|
||||
endif()
|
||||
endforeach()
|
||||
endif()
|
||||
endforeach()
|
||||
unset(__ocvmodules)
|
||||
unset(__directories_observed)
|
||||
unset(__path)
|
||||
unset(__modpath)
|
||||
unset(__pathIdx)
|
||||
ocv_clear_vars(__ocvmodules __directories_observed __path __modpath __pathIdx)
|
||||
|
||||
#resolve dependencies
|
||||
# resolve dependencies
|
||||
__ocv_flatten_module_dependencies()
|
||||
|
||||
#order modules by dependencies
|
||||
set(OPENCV_MODULES_BUILD_ "")
|
||||
foreach(m ${OPENCV_MODULES_BUILD})
|
||||
list(APPEND OPENCV_MODULES_BUILD_ ${OPENCV_MODULE_${m}_DEPS} ${m})
|
||||
endforeach()
|
||||
ocv_list_unique(OPENCV_MODULES_BUILD_)
|
||||
|
||||
#create modules
|
||||
# create modules
|
||||
set(OPENCV_INITIAL_PASS OFF PARENT_SCOPE)
|
||||
set(OPENCV_INITIAL_PASS OFF)
|
||||
foreach(m ${OPENCV_MODULES_BUILD_})
|
||||
foreach(m ${OPENCV_MODULES_BUILD})
|
||||
if(m MATCHES "^opencv_")
|
||||
string(REGEX REPLACE "^opencv_" "" __shortname "${m}")
|
||||
add_subdirectory("${OPENCV_MODULE_${m}_LOCATION}" "${CMAKE_CURRENT_BINARY_DIR}/${__shortname}")
|
||||
@ -389,7 +384,7 @@ endmacro()
|
||||
macro(ocv_module_include_directories)
|
||||
ocv_include_directories("${OPENCV_MODULE_${the_module}_LOCATION}/include"
|
||||
"${OPENCV_MODULE_${the_module}_LOCATION}/src"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}"#for precompiled headers
|
||||
"${CMAKE_CURRENT_BINARY_DIR}" # for precompiled headers
|
||||
)
|
||||
ocv_include_modules(${OPENCV_MODULE_${the_module}_DEPS} ${ARGN})
|
||||
endmacro()
|
||||
@ -417,7 +412,7 @@ macro(ocv_set_module_sources)
|
||||
endif()
|
||||
|
||||
# use full paths for module to be independent from the module location
|
||||
ocv_to_full_paths(OPENCV_MODULE_${the_module}_HEADERS)
|
||||
ocv_convert_to_full_paths(OPENCV_MODULE_${the_module}_HEADERS)
|
||||
|
||||
set(OPENCV_MODULE_${the_module}_HEADERS ${OPENCV_MODULE_${the_module}_HEADERS} CACHE INTERNAL "List of header files for ${the_module}")
|
||||
set(OPENCV_MODULE_${the_module}_SOURCES ${OPENCV_MODULE_${the_module}_SOURCES} CACHE INTERNAL "List of source files for ${the_module}")
|
||||
@ -446,9 +441,11 @@ endmacro()
|
||||
# ocv_create_module(SKIP_LINK)
|
||||
macro(ocv_create_module)
|
||||
add_library(${the_module} ${OPENCV_MODULE_TYPE} ${OPENCV_MODULE_${the_module}_HEADERS} ${OPENCV_MODULE_${the_module}_SOURCES})
|
||||
|
||||
if(NOT "${ARGN}" STREQUAL "SKIP_LINK")
|
||||
target_link_libraries(${the_module} ${OPENCV_MODULE_${the_module}_DEPS} ${OPENCV_MODULE_${the_module}_DEPS_EXT} ${OPENCV_LINKER_LIBS} ${IPP_LIBS} ${ARGN})
|
||||
endif()
|
||||
|
||||
add_dependencies(opencv_modules ${the_module})
|
||||
|
||||
if(ENABLE_SOLUTION_FOLDERS)
|
||||
@ -485,7 +482,7 @@ macro(ocv_create_module)
|
||||
if(CMAKE_CROSSCOMPILING)
|
||||
set_target_properties(${the_module} PROPERTIES LINK_FLAGS "/NODEFAULTLIB:secchk")
|
||||
endif()
|
||||
set_target_properties(${the_module} PROPERTIES LINK_FLAGS "/NODEFAULTLIB:libc /DEBUG")
|
||||
set_target_properties(${the_module} PROPERTIES LINK_FLAGS "/NODEFAULTLIB:libc /DEBUG")
|
||||
endif()
|
||||
|
||||
install(TARGETS ${the_module}
|
||||
@ -495,7 +492,7 @@ macro(ocv_create_module)
|
||||
)
|
||||
|
||||
# only "public" headers need to be installed
|
||||
if(OPENCV_MODULE_${the_module}_HEADERS AND OPENCV_MODULES_PUBLIC MATCHES "(^|;)${the_module}(;|$)")
|
||||
if(OPENCV_MODULE_${the_module}_HEADERS AND ";${OPENCV_MODULES_PUBLIC};" MATCHES ";${the_module};")
|
||||
foreach(hdr ${OPENCV_MODULE_${the_module}_HEADERS})
|
||||
string(REGEX REPLACE "^.*opencv2/" "opencv2/" hdr2 "${hdr}")
|
||||
if(hdr2 MATCHES "^(opencv2/.*)/[^/]+.h(..)?$")
|
||||
@ -510,30 +507,17 @@ endmacro()
|
||||
# Usage:
|
||||
# ocv_add_precompiled_headers(${the_module})
|
||||
macro(ocv_add_precompiled_headers the_target)
|
||||
if("${the_target}" MATCHES "^opencv_test_.*$")
|
||||
SET(pch_path "test/test_")
|
||||
if("${the_target}" MATCHES "^opencv_test_.*$")
|
||||
SET(pch_path "test/test_")
|
||||
elseif("${the_target}" MATCHES "opencv_perf_gpu_cpu")
|
||||
SET(pch_path "perf_cpu/perf_cpu_")
|
||||
elseif("${the_target}" MATCHES "^opencv_perf_.*$")
|
||||
SET(pch_path "perf/perf_")
|
||||
else()
|
||||
SET(pch_path "src/")
|
||||
endif()
|
||||
set(pch_header "${CMAKE_CURRENT_SOURCE_DIR}/${pch_path}precomp.hpp")
|
||||
|
||||
if(PCHSupport_FOUND AND ENABLE_PRECOMPILED_HEADERS AND EXISTS "${pch_header}")
|
||||
if(CMAKE_GENERATOR MATCHES Visual)
|
||||
set(${the_target}_pch "${CMAKE_CURRENT_SOURCE_DIR}/${pch_path}precomp.cpp")
|
||||
add_native_precompiled_header(${the_target} ${pch_header})
|
||||
elseif(CMAKE_GENERATOR MATCHES Xcode)
|
||||
add_native_precompiled_header(${the_target} ${pch_header})
|
||||
elseif(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_GENERATOR MATCHES "Makefiles|Ninja")
|
||||
add_precompiled_header(${the_target} ${pch_header})
|
||||
endif()
|
||||
endif()
|
||||
unset(pch_header)
|
||||
unset(pch_path)
|
||||
unset(${the_target}_pch)
|
||||
SET(pch_path "perf_cpu/perf_cpu_")
|
||||
elseif("${the_target}" MATCHES "^opencv_perf_.*$")
|
||||
SET(pch_path "perf/perf_")
|
||||
else()
|
||||
SET(pch_path "src/")
|
||||
endif()
|
||||
ocv_add_precompiled_header_to_target(${the_target} "${CMAKE_CURRENT_SOURCE_DIR}/${pch_path}precomp.hpp")
|
||||
unset(pch_path)
|
||||
endmacro()
|
||||
|
||||
# short command for adding simple OpenCV module
|
||||
@ -563,7 +547,7 @@ macro(ocv_check_dependencies)
|
||||
endforeach()
|
||||
endmacro()
|
||||
|
||||
#auxiliary macro to parse arguments of ocv_add_accuracy_tests and ocv_add_perf_tests commands
|
||||
# auxiliary macro to parse arguments of ocv_add_accuracy_tests and ocv_add_perf_tests commands
|
||||
macro(__ocv_parse_test_sources tests_type)
|
||||
set(OPENCV_${tests_type}_${the_module}_SOURCES "")
|
||||
set(OPENCV_${tests_type}_${the_module}_DEPS "")
|
||||
@ -593,7 +577,7 @@ endmacro()
|
||||
|
||||
# this is a command for adding OpenCV performance tests to the module
|
||||
# ocv_add_perf_tests(<extra_dependencies>)
|
||||
macro(ocv_add_perf_tests)
|
||||
function(ocv_add_perf_tests)
|
||||
set(perf_path "${CMAKE_CURRENT_SOURCE_DIR}/perf")
|
||||
if(BUILD_PERF_TESTS AND EXISTS "${perf_path}")
|
||||
__ocv_parse_test_sources(PERF ${ARGN})
|
||||
@ -604,7 +588,7 @@ macro(ocv_add_perf_tests)
|
||||
|
||||
if(OCV_DEPENDENCIES_FOUND)
|
||||
set(the_target "opencv_perf_${name}")
|
||||
#project(${the_target})
|
||||
# project(${the_target})
|
||||
|
||||
ocv_module_include_directories(${perf_deps} "${perf_path}")
|
||||
|
||||
@ -636,14 +620,14 @@ macro(ocv_add_perf_tests)
|
||||
add_dependencies(perf ${the_target})
|
||||
endif()
|
||||
else(OCV_DEPENDENCIES_FOUND)
|
||||
#TODO: warn about unsatisfied dependencies
|
||||
# TODO: warn about unsatisfied dependencies
|
||||
endif(OCV_DEPENDENCIES_FOUND)
|
||||
endif()
|
||||
endmacro()
|
||||
endfunction()
|
||||
|
||||
# this is a command for adding OpenCV accuracy/regression tests to the module
|
||||
# ocv_add_accuracy_tests([FILES <source group name> <list of sources>] [DEPENDS_ON] <list of extra dependencies>)
|
||||
macro(ocv_add_accuracy_tests)
|
||||
function(ocv_add_accuracy_tests)
|
||||
set(test_path "${CMAKE_CURRENT_SOURCE_DIR}/test")
|
||||
ocv_check_dependencies(${test_deps})
|
||||
if(BUILD_TESTS AND EXISTS "${test_path}")
|
||||
@ -655,7 +639,7 @@ macro(ocv_add_accuracy_tests)
|
||||
|
||||
if(OCV_DEPENDENCIES_FOUND)
|
||||
set(the_target "opencv_test_${name}")
|
||||
#project(${the_target})
|
||||
# project(${the_target})
|
||||
|
||||
ocv_module_include_directories(${test_deps} "${test_path}")
|
||||
|
||||
@ -687,12 +671,12 @@ macro(ocv_add_accuracy_tests)
|
||||
|
||||
ocv_add_precompiled_headers(${the_target})
|
||||
else(OCV_DEPENDENCIES_FOUND)
|
||||
#TODO: warn about unsatisfied dependencies
|
||||
# TODO: warn about unsatisfied dependencies
|
||||
endif(OCV_DEPENDENCIES_FOUND)
|
||||
endif()
|
||||
endmacro()
|
||||
endfunction()
|
||||
|
||||
# internal macro; finds all link dependencies of module
|
||||
# internal macro; finds all link dependencies of the module
|
||||
# should be used at the end of CMake processing
|
||||
macro(__ocv_track_module_link_dependencies the_module optkind)
|
||||
set(${the_module}_MODULE_DEPS_${optkind} "")
|
||||
@ -742,7 +726,7 @@ macro(__ocv_track_module_link_dependencies the_module optkind)
|
||||
#ocv_list_reverse(${the_module}_EXTRA_DEPS_${optkind})
|
||||
|
||||
if(__has_cycle)
|
||||
#not sure if it can work
|
||||
# not sure if it can work
|
||||
list(APPEND ${the_module}_MODULE_DEPS_${optkind} "${the_module}")
|
||||
endif()
|
||||
|
||||
@ -754,13 +738,13 @@ macro(__ocv_track_module_link_dependencies the_module optkind)
|
||||
endif()#STATIC_LIBRARY
|
||||
unset(__module_type)
|
||||
|
||||
#message("${the_module}_MODULE_DEPS_${optkind}")
|
||||
#message(" ${${the_module}_MODULE_DEPS_${optkind}}")
|
||||
#message(" ${OPENCV_MODULE_${the_module}_DEPS}")
|
||||
#message("")
|
||||
#message("${the_module}_EXTRA_DEPS_${optkind}")
|
||||
#message(" ${${the_module}_EXTRA_DEPS_${optkind}}")
|
||||
#message("")
|
||||
#message("${the_module}_MODULE_DEPS_${optkind}")
|
||||
#message(" ${${the_module}_MODULE_DEPS_${optkind}}")
|
||||
#message(" ${OPENCV_MODULE_${the_module}_DEPS}")
|
||||
#message("")
|
||||
#message("${the_module}_EXTRA_DEPS_${optkind}")
|
||||
#message(" ${${the_module}_EXTRA_DEPS_${optkind}}")
|
||||
#message("")
|
||||
endmacro()
|
||||
|
||||
# creates lists of build dependencies needed for external projects
|
||||
|
@ -1,4 +1,4 @@
|
||||
# taken from http://www.vtk.org/Bug/view.php?id=1260 and slightly adjusted
|
||||
# taken from http://public.kitware.com/Bug/view.php?id=1260 and slightly adjusted
|
||||
|
||||
# - Try to find precompiled headers support for GCC 3.4 and 4.x
|
||||
# Once done this will define:
|
||||
@ -26,8 +26,8 @@ IF(CMAKE_COMPILER_IS_GNUCXX)
|
||||
SET(_PCH_include_prefix "-I")
|
||||
SET(_PCH_isystem_prefix "-isystem")
|
||||
|
||||
ELSEIF(WIN32)
|
||||
SET(PCHSupport_FOUND TRUE) # for experimental msvc support
|
||||
ELSEIF(CMAKE_GENERATOR MATCHES "^Visual.*$")
|
||||
SET(PCHSupport_FOUND TRUE)
|
||||
SET(_PCH_include_prefix "/I")
|
||||
SET(_PCH_isystem_prefix "/I")
|
||||
ELSE()
|
||||
@ -324,3 +324,17 @@ MACRO(ADD_NATIVE_PRECOMPILED_HEADER _targetName _input)
|
||||
endif()
|
||||
|
||||
ENDMACRO(ADD_NATIVE_PRECOMPILED_HEADER)
|
||||
|
||||
macro(ocv_add_precompiled_header_to_target the_target pch_header)
|
||||
if(PCHSupport_FOUND AND ENABLE_PRECOMPILED_HEADERS AND EXISTS "${pch_header}")
|
||||
if(CMAKE_GENERATOR MATCHES Visual)
|
||||
string(REGEX REPLACE "hpp$" "cpp" ${the_target}_pch "${pch_header}")
|
||||
add_native_precompiled_header(${the_target} ${pch_header})
|
||||
unset(${the_target}_pch)
|
||||
elseif(CMAKE_GENERATOR MATCHES Xcode)
|
||||
add_native_precompiled_header(${the_target} ${pch_header})
|
||||
elseif(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_GENERATOR MATCHES "Makefiles|Ninja")
|
||||
add_precompiled_header(${the_target} ${pch_header})
|
||||
endif()
|
||||
endif()
|
||||
endmacro()
|
||||
|
@ -384,6 +384,17 @@ macro(ocv_list_add_suffix LST SUFFIX)
|
||||
endmacro()
|
||||
|
||||
|
||||
# gets and removes the first element from list
|
||||
macro(ocv_list_pop_front LST VAR)
|
||||
if(${LST})
|
||||
list(GET ${LST} 0 ${VAR})
|
||||
list(REMOVE_AT ${LST} 0)
|
||||
else()
|
||||
set(${VAR} "")
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
|
||||
# simple regex escaping routine (does not cover all cases!!!)
|
||||
macro(ocv_regex_escape var regex)
|
||||
string(REGEX REPLACE "([+.*^$])" "\\\\1" ${var} "${regex}")
|
||||
@ -401,7 +412,7 @@ endmacro()
|
||||
|
||||
|
||||
# convert list of paths to full paths
|
||||
macro(ocv_to_full_paths VAR)
|
||||
macro(ocv_convert_to_full_paths VAR)
|
||||
if(${VAR})
|
||||
set(__tmp "")
|
||||
foreach(path ${${VAR}})
|
||||
|
@ -102,7 +102,7 @@ You need the following tools to be installed:
|
||||
#. **Eclipse IDE**
|
||||
|
||||
Check the `Android SDK System Requirements <http://developer.android.com/sdk/requirements.html>`_ document for a list of Eclipse versions that are compatible with the Android SDK.
|
||||
For OpenCV 2.4.0 we recommend Eclipse 3.7 (Indigo) or later versions. They work well for OpenCV under both Windows and Linux.
|
||||
For OpenCV 2.4.x we recommend Eclipse 3.7 (Indigo) or later versions. They work well for OpenCV under both Windows and Linux.
|
||||
|
||||
If you have no Eclipse installed, you can get it from the `download page <http://www.eclipse.org/downloads/>`_.
|
||||
|
||||
@ -154,12 +154,12 @@ Get the OpenCV package for Android development
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
tar -jxvf ~/Downloads/OpenCV-2.4.0-android-bin.tar.bz2
|
||||
tar -jxvf ~/Downloads/OpenCV-2.4.1-android-bin2.tar.bz2
|
||||
|
||||
For this tutorial I have unpacked OpenCV to the :file:`C:\\Work\\android-opencv\\` directory.
|
||||
|
||||
.. |opencv_android_bin_pack| replace:: OpenCV-2.4.0-android-bin.tar.bz2
|
||||
.. _opencv_android_bin_pack_url: http://sourceforge.net/projects/opencvlibrary/files/opencv-android/2.4.0/OpenCV-2.4.0-android-bin.tar.bz2/download
|
||||
.. |opencv_android_bin_pack| replace:: OpenCV-2.4.1-android-bin2.tar.bz2
|
||||
.. _opencv_android_bin_pack_url: http://sourceforge.net/projects/opencvlibrary/files/opencv-android/2.4.1/OpenCV-2.4.1-android-bin2.tar.bz2/download
|
||||
.. |opencv_android_bin_pack_url| replace:: |opencv_android_bin_pack|
|
||||
.. |seven_zip| replace:: 7-Zip
|
||||
.. _seven_zip: http://www.7-zip.org/
|
||||
@ -214,7 +214,7 @@ Open OpenCV library and samples in Eclipse
|
||||
:align: center
|
||||
|
||||
* Click :guilabel:`OK` to close preferences dialog.
|
||||
|
||||
|
||||
#. Import OpenCV and samples into workspace.
|
||||
|
||||
OpenCV library is packed as a ready-for-use `Android Library Project
|
||||
|
@ -48,10 +48,12 @@
|
||||
#include <iostream>
|
||||
|
||||
#if defined(HAVE_EIGEN) && EIGEN_WORLD_VERSION == 3
|
||||
#include <Eigen/Core>
|
||||
#include <unsupported/Eigen/MatrixFunctions>
|
||||
|
||||
#include <Eigen/Dense>
|
||||
# include <Eigen/Core>
|
||||
# ifdef ANDROID
|
||||
template <typename Scalar> Scalar log2(Scalar v) { using std::log; return log(v)/log(Scalar(2)); }
|
||||
# endif
|
||||
# include <unsupported/Eigen/MatrixFunctions>
|
||||
# include <Eigen/Dense>
|
||||
#endif
|
||||
|
||||
#include <limits>
|
||||
@ -581,7 +583,7 @@ bool cv::RGBDOdometry( cv::Mat& Rt, const Mat& initRt,
|
||||
const double fy = levelCameraMatrix.at<double>(1,1);
|
||||
const double determinantThreshold = 1e-6;
|
||||
|
||||
Mat corresps( levelImage0.size(), levelImage0.type(), CV_32SC1 );
|
||||
Mat corresps( levelImage0.size(), levelImage0.type() );
|
||||
|
||||
// Run transformation search on current level iteratively.
|
||||
for( int iter = 0; iter < (*iterCountsPtr)[level]; iter ++ )
|
||||
|
@ -85,7 +85,6 @@ template<typename _Tp, int cn> class CV_EXPORTS Vec;
|
||||
template<typename _Tp, int m, int n> class CV_EXPORTS Matx;
|
||||
|
||||
typedef std::string String;
|
||||
typedef std::basic_string<wchar_t> WString;
|
||||
|
||||
class Mat;
|
||||
class SparseMat;
|
||||
@ -110,8 +109,12 @@ template<typename _Tp> class CV_EXPORTS MatIterator_;
|
||||
template<typename _Tp> class CV_EXPORTS MatConstIterator_;
|
||||
template<typename _Tp> class CV_EXPORTS MatCommaInitializer_;
|
||||
|
||||
#if !defined(ANDROID) || (defined(_GLIBCXX_USE_WCHAR_T) && _GLIBCXX_USE_WCHAR_T)
|
||||
typedef std::basic_string<wchar_t> WString;
|
||||
|
||||
CV_EXPORTS string fromUtf16(const WString& str);
|
||||
CV_EXPORTS WString toUtf16(const string& str);
|
||||
#endif
|
||||
|
||||
CV_EXPORTS string format( const char* fmt, ... );
|
||||
CV_EXPORTS string tempfile( const char* suffix CV_DEFAULT(0));
|
||||
|
@ -151,7 +151,7 @@ cv::string cv::FileStorage::getDefaultObjectName(const string& _filename)
|
||||
|
||||
namespace cv
|
||||
{
|
||||
#if !defined(ANDROID) || defined(_GLIBCXX_USE_WCHAR_T)
|
||||
#if !defined(ANDROID) || (defined(_GLIBCXX_USE_WCHAR_T) && _GLIBCXX_USE_WCHAR_T)
|
||||
string fromUtf16(const WString& str)
|
||||
{
|
||||
cv::AutoBuffer<char> _buf(str.size()*4 + 1);
|
||||
|
1883
modules/gpu/perf/perf_core.cpp
Normal file
1883
modules/gpu/perf/perf_core.cpp
Normal file
File diff suppressed because it is too large
Load Diff
@ -1005,11 +1005,11 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchConvert(const Mat& trainIdx
|
||||
|
||||
for (int i = 0; i < nMatches; ++i, ++trainIdx_ptr, ++imgIdx_ptr, ++distance_ptr)
|
||||
{
|
||||
int trainIdx = *trainIdx_ptr;
|
||||
int imgIdx = *imgIdx_ptr;
|
||||
float distance = *distance_ptr;
|
||||
int _trainIdx = *trainIdx_ptr;
|
||||
int _imgIdx = *imgIdx_ptr;
|
||||
float _distance = *distance_ptr;
|
||||
|
||||
DMatch m(queryIdx, trainIdx, imgIdx, distance);
|
||||
DMatch m(queryIdx, _trainIdx, _imgIdx, _distance);
|
||||
|
||||
curMatches.push_back(m);
|
||||
}
|
||||
|
@ -45,19 +45,19 @@
|
||||
#include "opencv2/gpu/device/vec_distance.hpp"
|
||||
#include "opencv2/gpu/device/datamov_utils.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace bf_knnmatch
|
||||
namespace bf_knnmatch
|
||||
{
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Reduction
|
||||
|
||||
template <int BLOCK_SIZE>
|
||||
__device__ void findBestMatch(float& bestDistance1, float& bestDistance2,
|
||||
int& bestTrainIdx1, int& bestTrainIdx2,
|
||||
template <int BLOCK_SIZE>
|
||||
__device__ void findBestMatch(float& bestDistance1, float& bestDistance2,
|
||||
int& bestTrainIdx1, int& bestTrainIdx2,
|
||||
float* s_distance, int* s_trainIdx)
|
||||
{
|
||||
float myBestDistance1 = numeric_limits<float>::max();
|
||||
float myBestDistance1 = numeric_limits<float>::max();
|
||||
float myBestDistance2 = numeric_limits<float>::max();
|
||||
int myBestTrainIdx1 = -1;
|
||||
int myBestTrainIdx2 = -1;
|
||||
@ -122,13 +122,13 @@ namespace cv { namespace gpu { namespace device
|
||||
bestTrainIdx2 = myBestTrainIdx2;
|
||||
}
|
||||
|
||||
template <int BLOCK_SIZE>
|
||||
__device__ void findBestMatch(float& bestDistance1, float& bestDistance2,
|
||||
int& bestTrainIdx1, int& bestTrainIdx2,
|
||||
int& bestImgIdx1, int& bestImgIdx2,
|
||||
template <int BLOCK_SIZE>
|
||||
__device__ void findBestMatch(float& bestDistance1, float& bestDistance2,
|
||||
int& bestTrainIdx1, int& bestTrainIdx2,
|
||||
int& bestImgIdx1, int& bestImgIdx2,
|
||||
float* s_distance, int* s_trainIdx, int* s_imgIdx)
|
||||
{
|
||||
float myBestDistance1 = numeric_limits<float>::max();
|
||||
float myBestDistance1 = numeric_limits<float>::max();
|
||||
float myBestDistance2 = numeric_limits<float>::max();
|
||||
int myBestTrainIdx1 = -1;
|
||||
int myBestTrainIdx2 = -1;
|
||||
@ -208,7 +208,7 @@ namespace cv { namespace gpu { namespace device
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Match Unrolled Cached
|
||||
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename T, typename U>
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename T, typename U>
|
||||
__device__ void loadQueryToSmem(int queryIdx, const DevMem2D_<T>& query, U* s_query)
|
||||
{
|
||||
#pragma unroll
|
||||
@ -219,11 +219,11 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
}
|
||||
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||
__device__ void loopUnrolledCached(int queryIdx, const DevMem2D_<T>& query, int imgIdx, const DevMem2D_<T>& train, const Mask& mask,
|
||||
typename Dist::value_type* s_query, typename Dist::value_type* s_train,
|
||||
float& bestDistance1, float& bestDistance2,
|
||||
int& bestTrainIdx1, int& bestTrainIdx2,
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||
__device__ void loopUnrolledCached(int queryIdx, const DevMem2D_<T>& query, int imgIdx, const DevMem2D_<T>& train, const Mask& mask,
|
||||
typename Dist::value_type* s_query, typename Dist::value_type* s_train,
|
||||
float& bestDistance1, float& bestDistance2,
|
||||
int& bestTrainIdx1, int& bestTrainIdx2,
|
||||
int& bestImgIdx1, int& bestImgIdx2)
|
||||
{
|
||||
for (int t = 0, endt = (train.rows + BLOCK_SIZE - 1) / BLOCK_SIZE; t < endt; ++t)
|
||||
@ -280,7 +280,7 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
}
|
||||
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||
__global__ void matchUnrolledCached(const DevMem2D_<T> query, const DevMem2D_<T> train, const Mask mask, int2* bestTrainIdx, float2* bestDistance)
|
||||
{
|
||||
extern __shared__ int smem[];
|
||||
@ -313,9 +313,9 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
}
|
||||
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||
void matchUnrolledCached(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
||||
const DevMem2D_<int2>& trainIdx, const DevMem2D_<float2>& distance,
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||
void matchUnrolledCached(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
||||
const DevMem2D_<int2>& trainIdx, const DevMem2D_<float2>& distance,
|
||||
cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||
@ -330,7 +330,7 @@ namespace cv { namespace gpu { namespace device
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||
__global__ void matchUnrolledCached(const DevMem2D_<T> query, const DevMem2D_<T>* trains, int n, const Mask mask, int2* bestTrainIdx, int2* bestImgIdx, float2* bestDistance)
|
||||
{
|
||||
extern __shared__ int smem[];
|
||||
@ -374,9 +374,9 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
}
|
||||
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||
void matchUnrolledCached(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
||||
const DevMem2D_<int2>& trainIdx, const DevMem2D_<int2>& imgIdx, const DevMem2D_<float2>& distance,
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||
void matchUnrolledCached(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
||||
const DevMem2D_<int2>& trainIdx, const DevMem2D_<int2>& imgIdx, const DevMem2D_<float2>& distance,
|
||||
cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||
@ -394,11 +394,11 @@ namespace cv { namespace gpu { namespace device
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Match Unrolled
|
||||
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||
__device__ void loopUnrolled(int queryIdx, const DevMem2D_<T>& query, int imgIdx, const DevMem2D_<T>& train, const Mask& mask,
|
||||
typename Dist::value_type* s_query, typename Dist::value_type* s_train,
|
||||
float& bestDistance1, float& bestDistance2,
|
||||
int& bestTrainIdx1, int& bestTrainIdx2,
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||
__device__ void loopUnrolled(int queryIdx, const DevMem2D_<T>& query, int imgIdx, const DevMem2D_<T>& train, const Mask& mask,
|
||||
typename Dist::value_type* s_query, typename Dist::value_type* s_train,
|
||||
float& bestDistance1, float& bestDistance2,
|
||||
int& bestTrainIdx1, int& bestTrainIdx2,
|
||||
int& bestImgIdx1, int& bestImgIdx2)
|
||||
{
|
||||
for (int t = 0, endt = (train.rows + BLOCK_SIZE - 1) / BLOCK_SIZE; t < endt; ++t)
|
||||
@ -459,7 +459,7 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
}
|
||||
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||
__global__ void matchUnrolled(const DevMem2D_<T> query, const DevMem2D_<T> train, const Mask mask, int2* bestTrainIdx, float2* bestDistance)
|
||||
{
|
||||
extern __shared__ int smem[];
|
||||
@ -490,9 +490,9 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
}
|
||||
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||
void matchUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
||||
const DevMem2D_<int2>& trainIdx, const DevMem2D_<float2>& distance,
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||
void matchUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
||||
const DevMem2D_<int2>& trainIdx, const DevMem2D_<float2>& distance,
|
||||
cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||
@ -507,7 +507,7 @@ namespace cv { namespace gpu { namespace device
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||
__global__ void matchUnrolled(const DevMem2D_<T> query, const DevMem2D_<T>* trains, int n, const Mask mask, int2* bestTrainIdx, int2* bestImgIdx, float2* bestDistance)
|
||||
{
|
||||
extern __shared__ int smem[];
|
||||
@ -549,9 +549,9 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
}
|
||||
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||
void matchUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
||||
const DevMem2D_<int2>& trainIdx, const DevMem2D_<int2>& imgIdx, const DevMem2D_<float2>& distance,
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||
void matchUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
||||
const DevMem2D_<int2>& trainIdx, const DevMem2D_<int2>& imgIdx, const DevMem2D_<float2>& distance,
|
||||
cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||
@ -569,11 +569,11 @@ namespace cv { namespace gpu { namespace device
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Match
|
||||
|
||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||
__device__ void loop(int queryIdx, const DevMem2D_<T>& query, int imgIdx, const DevMem2D_<T>& train, const Mask& mask,
|
||||
typename Dist::value_type* s_query, typename Dist::value_type* s_train,
|
||||
float& bestDistance1, float& bestDistance2,
|
||||
int& bestTrainIdx1, int& bestTrainIdx2,
|
||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||
__device__ void loop(int queryIdx, const DevMem2D_<T>& query, int imgIdx, const DevMem2D_<T>& train, const Mask& mask,
|
||||
typename Dist::value_type* s_query, typename Dist::value_type* s_train,
|
||||
float& bestDistance1, float& bestDistance2,
|
||||
int& bestTrainIdx1, int& bestTrainIdx2,
|
||||
int& bestImgIdx1, int& bestImgIdx2)
|
||||
{
|
||||
for (int t = 0, endt = (train.rows + BLOCK_SIZE - 1) / BLOCK_SIZE; t < endt; ++t)
|
||||
@ -633,7 +633,7 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
}
|
||||
|
||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||
__global__ void match(const DevMem2D_<T> query, const DevMem2D_<T> train, const Mask mask, int2* bestTrainIdx, float2* bestDistance)
|
||||
{
|
||||
extern __shared__ int smem[];
|
||||
@ -664,9 +664,9 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
}
|
||||
|
||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||
void match(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
||||
const DevMem2D_<int2>& trainIdx, const DevMem2D_<float2>& distance,
|
||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||
void match(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
||||
const DevMem2D_<int2>& trainIdx, const DevMem2D_<float2>& distance,
|
||||
cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||
@ -681,7 +681,7 @@ namespace cv { namespace gpu { namespace device
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||
__global__ void match(const DevMem2D_<T> query, const DevMem2D_<T>* trains, int n, const Mask mask, int2* bestTrainIdx, int2* bestImgIdx, float2* bestDistance)
|
||||
{
|
||||
extern __shared__ int smem[];
|
||||
@ -723,9 +723,9 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
}
|
||||
|
||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||
void match(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
||||
const DevMem2D_<int2>& trainIdx, const DevMem2D_<int2>& imgIdx, const DevMem2D_<float2>& distance,
|
||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||
void match(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
||||
const DevMem2D_<int2>& trainIdx, const DevMem2D_<int2>& imgIdx, const DevMem2D_<float2>& distance,
|
||||
cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||
@ -743,9 +743,9 @@ namespace cv { namespace gpu { namespace device
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// knnMatch 2 dispatcher
|
||||
|
||||
template <typename Dist, typename T, typename Mask>
|
||||
void match2Dispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
||||
const DevMem2Db& trainIdx, const DevMem2Db& distance,
|
||||
template <typename Dist, typename T, typename Mask>
|
||||
void match2Dispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
||||
const DevMem2Db& trainIdx, const DevMem2Db& distance,
|
||||
int cc, cudaStream_t stream)
|
||||
{
|
||||
if (query.cols <= 64)
|
||||
@ -761,11 +761,11 @@ namespace cv { namespace gpu { namespace device
|
||||
matchUnrolled<16, 256, Dist>(query, train, mask, static_cast< DevMem2D_<int2> >(trainIdx), static_cast< DevMem2D_<float2> > (distance), stream);
|
||||
}
|
||||
else if (query.cols <= 512)
|
||||
{
|
||||
{
|
||||
matchUnrolled<16, 512, Dist>(query, train, mask, static_cast< DevMem2D_<int2> >(trainIdx), static_cast< DevMem2D_<float2> > (distance), stream);
|
||||
}
|
||||
else if (query.cols <= 1024)
|
||||
{
|
||||
{
|
||||
matchUnrolled<16, 1024, Dist>(query, train, mask, static_cast< DevMem2D_<int2> >(trainIdx), static_cast< DevMem2D_<float2> > (distance), stream);
|
||||
}*/
|
||||
else
|
||||
@ -774,9 +774,9 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Dist, typename T, typename Mask>
|
||||
void match2Dispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
||||
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
|
||||
template <typename Dist, typename T, typename Mask>
|
||||
void match2Dispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
||||
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
|
||||
int cc, cudaStream_t stream)
|
||||
{
|
||||
if (query.cols <= 64)
|
||||
@ -792,11 +792,11 @@ namespace cv { namespace gpu { namespace device
|
||||
matchUnrolled<16, 256, Dist>(query, trains, n, mask, static_cast< DevMem2D_<int2> >(trainIdx), static_cast< DevMem2D_<int2> >(imgIdx), static_cast< DevMem2D_<float2> > (distance), stream);
|
||||
}
|
||||
else if (query.cols <= 512)
|
||||
{
|
||||
{
|
||||
matchUnrolled<16, 512, Dist>(query, trains, n, mask, static_cast< DevMem2D_<int2> >(trainIdx), static_cast< DevMem2D_<int2> >(imgIdx), static_cast< DevMem2D_<float2> > (distance), stream);
|
||||
}
|
||||
else if (query.cols <= 1024)
|
||||
{
|
||||
{
|
||||
matchUnrolled<16, 1024, Dist>(query, trains, n, mask, static_cast< DevMem2D_<int2> >(trainIdx), static_cast< DevMem2D_<int2> >(imgIdx), static_cast< DevMem2D_<float2> > (distance), stream);
|
||||
}*/
|
||||
else
|
||||
@ -832,7 +832,7 @@ namespace cv { namespace gpu { namespace device
|
||||
s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = train.ptr(::min(blockIdx.x * BLOCK_SIZE + threadIdx.y, train.rows - 1))[loadX];
|
||||
}
|
||||
else
|
||||
{
|
||||
{
|
||||
s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = 0;
|
||||
s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = 0;
|
||||
}
|
||||
@ -857,7 +857,7 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
}
|
||||
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||
void calcDistanceUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask, const DevMem2Df& allDist, cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||
@ -895,7 +895,7 @@ namespace cv { namespace gpu { namespace device
|
||||
s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = train.ptr(::min(blockIdx.x * BLOCK_SIZE + threadIdx.y, train.rows - 1))[loadX];
|
||||
}
|
||||
else
|
||||
{
|
||||
{
|
||||
s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = 0;
|
||||
s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = 0;
|
||||
}
|
||||
@ -920,7 +920,7 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
}
|
||||
|
||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||
void calcDistance(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask, const DevMem2Df& allDist, cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||
@ -938,9 +938,9 @@ namespace cv { namespace gpu { namespace device
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Calc Distance dispatcher
|
||||
|
||||
template <typename Dist, typename T, typename Mask>
|
||||
void calcDistanceDispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
||||
const DevMem2Df& allDist,
|
||||
template <typename Dist, typename T, typename Mask>
|
||||
void calcDistanceDispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
||||
const DevMem2Df& allDist,
|
||||
int cc, cudaStream_t stream)
|
||||
{
|
||||
if (query.cols <= 64)
|
||||
@ -956,11 +956,11 @@ namespace cv { namespace gpu { namespace device
|
||||
calcDistanceUnrolled<16, 256, Dist>(query, train, mask, allDist, stream);
|
||||
}
|
||||
else if (query.cols <= 512)
|
||||
{
|
||||
{
|
||||
calcDistanceUnrolled<16, 512, Dist>(query, train, mask, allDist, stream);
|
||||
}
|
||||
else if (query.cols <= 1024)
|
||||
{
|
||||
{
|
||||
calcDistanceUnrolled<16, 1024, Dist>(query, train, mask, allDist, stream);
|
||||
}*/
|
||||
else
|
||||
@ -972,7 +972,7 @@ namespace cv { namespace gpu { namespace device
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// find knn match kernel
|
||||
|
||||
template <int BLOCK_SIZE>
|
||||
template <int BLOCK_SIZE>
|
||||
__global__ void findBestMatch(DevMem2Df allDist, int i, PtrStepi trainIdx, PtrStepf distance)
|
||||
{
|
||||
const int SMEM_SIZE = BLOCK_SIZE > 64 ? BLOCK_SIZE : 64;
|
||||
@ -985,7 +985,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
float dist = numeric_limits<float>::max();
|
||||
int bestIdx = -1;
|
||||
|
||||
|
||||
for (int i = threadIdx.x; i < allDist.cols; i += BLOCK_SIZE)
|
||||
{
|
||||
float reg = allDistRow[i];
|
||||
@ -1013,7 +1013,7 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
}
|
||||
|
||||
template <int BLOCK_SIZE>
|
||||
template <int BLOCK_SIZE>
|
||||
void findKnnMatch(int k, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(BLOCK_SIZE, 1, 1);
|
||||
@ -1038,8 +1038,8 @@ namespace cv { namespace gpu { namespace device
|
||||
// knn match Dispatcher
|
||||
|
||||
template <typename Dist, typename T, typename Mask>
|
||||
void matchDispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>& train, int k, const Mask& mask,
|
||||
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
|
||||
void matchDispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>& train, int k, const Mask& mask,
|
||||
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
|
||||
int cc, cudaStream_t stream)
|
||||
{
|
||||
if (k == 2)
|
||||
@ -1051,13 +1051,13 @@ namespace cv { namespace gpu { namespace device
|
||||
calcDistanceDispatcher<Dist>(query, train, mask, allDist, cc, stream);
|
||||
findKnnMatchDispatcher(k, trainIdx, distance, allDist, cc, stream);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// knn match caller
|
||||
|
||||
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask,
|
||||
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
|
||||
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask,
|
||||
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
|
||||
int cc, cudaStream_t stream)
|
||||
{
|
||||
if (mask.data)
|
||||
@ -1073,7 +1073,7 @@ namespace cv { namespace gpu { namespace device
|
||||
template void matchL1_gpu<int >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
||||
template void matchL1_gpu<float >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
||||
|
||||
template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask,
|
||||
template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask,
|
||||
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
|
||||
int cc, cudaStream_t stream)
|
||||
{
|
||||
@ -1091,7 +1091,7 @@ namespace cv { namespace gpu { namespace device
|
||||
template void matchL2_gpu<float >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
||||
|
||||
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask,
|
||||
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
|
||||
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
|
||||
int cc, cudaStream_t stream)
|
||||
{
|
||||
if (mask.data)
|
||||
@ -1106,8 +1106,8 @@ namespace cv { namespace gpu { namespace device
|
||||
//template void matchHamming_gpu<short >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
||||
template void matchHamming_gpu<int >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
||||
|
||||
template <typename T> void match2L1_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
||||
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
|
||||
template <typename T> void match2L1_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
||||
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
|
||||
int cc, cudaStream_t stream)
|
||||
{
|
||||
if (masks.data)
|
||||
@ -1123,8 +1123,8 @@ namespace cv { namespace gpu { namespace device
|
||||
template void match2L1_gpu<int >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
||||
template void match2L1_gpu<float >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
||||
|
||||
template <typename T> void match2L2_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
||||
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
|
||||
template <typename T> void match2L2_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
||||
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
|
||||
int cc, cudaStream_t stream)
|
||||
{
|
||||
if (masks.data)
|
||||
@ -1140,8 +1140,8 @@ namespace cv { namespace gpu { namespace device
|
||||
//template void match2L2_gpu<int >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Di& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
||||
template void match2L2_gpu<float >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
||||
|
||||
template <typename T> void match2Hamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
||||
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
|
||||
template <typename T> void match2Hamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
||||
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
|
||||
int cc, cudaStream_t stream)
|
||||
{
|
||||
if (masks.data)
|
||||
|
@ -45,14 +45,14 @@
|
||||
#include "opencv2/gpu/device/vec_distance.hpp"
|
||||
#include "opencv2/gpu/device/datamov_utils.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace bf_match
|
||||
namespace bf_match
|
||||
{
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Reduction
|
||||
|
||||
template <int BLOCK_SIZE>
|
||||
template <int BLOCK_SIZE>
|
||||
__device__ void findBestMatch(float& bestDistance, int& bestTrainIdx, float* s_distance, int* s_trainIdx)
|
||||
{
|
||||
s_distance += threadIdx.y * BLOCK_SIZE;
|
||||
@ -66,7 +66,7 @@ namespace cv { namespace gpu { namespace device
|
||||
reducePredVal<BLOCK_SIZE>(s_distance, bestDistance, s_trainIdx, bestTrainIdx, threadIdx.x, less<volatile float>());
|
||||
}
|
||||
|
||||
template <int BLOCK_SIZE>
|
||||
template <int BLOCK_SIZE>
|
||||
__device__ void findBestMatch(float& bestDistance, int& bestTrainIdx, int& bestImgIdx, float* s_distance, int* s_trainIdx, int* s_imgIdx)
|
||||
{
|
||||
s_distance += threadIdx.y * BLOCK_SIZE;
|
||||
@ -85,7 +85,7 @@ namespace cv { namespace gpu { namespace device
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Match Unrolled Cached
|
||||
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename T, typename U>
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename T, typename U>
|
||||
__device__ void loadQueryToSmem(int queryIdx, const DevMem2D_<T>& query, U* s_query)
|
||||
{
|
||||
#pragma unroll
|
||||
@ -96,9 +96,9 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
}
|
||||
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||
__device__ void loopUnrolledCached(int queryIdx, const DevMem2D_<T>& query,volatile int imgIdx, const DevMem2D_<T>& train, const Mask& mask,
|
||||
typename Dist::value_type* s_query, typename Dist::value_type* s_train,
|
||||
typename Dist::value_type* s_query, typename Dist::value_type* s_train,
|
||||
float& bestDistance, int& bestTrainIdx, int& bestImgIdx)
|
||||
{
|
||||
for (int t = 0, endt = (train.rows + BLOCK_SIZE - 1) / BLOCK_SIZE; t < endt; ++t)
|
||||
@ -142,7 +142,7 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
}
|
||||
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||
__global__ void matchUnrolledCached(const DevMem2D_<T> query, const DevMem2D_<T> train, const Mask mask, int* bestTrainIdx, float* bestDistance)
|
||||
{
|
||||
extern __shared__ int smem[];
|
||||
@ -173,9 +173,9 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
}
|
||||
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||
void matchUnrolledCached(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
||||
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||
void matchUnrolledCached(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
||||
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
||||
cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||
@ -190,8 +190,8 @@ namespace cv { namespace gpu { namespace device
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||
__global__ void matchUnrolledCached(const DevMem2D_<T> query, const DevMem2D_<T>* trains, int n, const Mask mask,
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||
__global__ void matchUnrolledCached(const DevMem2D_<T> query, const DevMem2D_<T>* trains, int n, const Mask mask,
|
||||
int* bestTrainIdx, int* bestImgIdx, float* bestDistance)
|
||||
{
|
||||
extern __shared__ int smem[];
|
||||
@ -232,9 +232,9 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
}
|
||||
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||
void matchUnrolledCached(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||
void matchUnrolledCached(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
||||
cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||
@ -252,9 +252,9 @@ namespace cv { namespace gpu { namespace device
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Match Unrolled
|
||||
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||
__device__ void loopUnrolled(int queryIdx, const DevMem2D_<T>& query,volatile int imgIdx, const DevMem2D_<T>& train, const Mask& mask,
|
||||
typename Dist::value_type* s_query, typename Dist::value_type* s_train,
|
||||
typename Dist::value_type* s_query, typename Dist::value_type* s_train,
|
||||
float& bestDistance, int& bestTrainIdx, int& bestImgIdx)
|
||||
{
|
||||
for (int t = 0, endt = (train.rows + BLOCK_SIZE - 1) / BLOCK_SIZE; t < endt; ++t)
|
||||
@ -314,7 +314,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
typename Dist::value_type* s_query = (typename Dist::value_type*)(smem);
|
||||
typename Dist::value_type* s_train = (typename Dist::value_type*)(smem + BLOCK_SIZE * BLOCK_SIZE);
|
||||
|
||||
|
||||
loopUnrolled<BLOCK_SIZE, MAX_DESC_LEN, Dist>(queryIdx, query, 0, train, mask, s_query, s_train, myBestDistance, myBestTrainIdx, myBestTrainIdx);
|
||||
|
||||
__syncthreads();
|
||||
@ -331,9 +331,9 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
}
|
||||
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||
void matchUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
||||
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||
void matchUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
||||
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
||||
cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||
@ -349,7 +349,7 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||
__global__ void matchUnrolled(const DevMem2D_<T> query, const DevMem2D_<T>* trains, int n, const Mask mask,
|
||||
__global__ void matchUnrolled(const DevMem2D_<T> query, const DevMem2D_<T>* trains, int n, const Mask mask,
|
||||
int* bestTrainIdx, int* bestImgIdx, float* bestDistance)
|
||||
{
|
||||
extern __shared__ int smem[];
|
||||
@ -364,7 +364,7 @@ namespace cv { namespace gpu { namespace device
|
||||
typename Dist::value_type* s_train = (typename Dist::value_type*)(smem + BLOCK_SIZE * BLOCK_SIZE);
|
||||
|
||||
Mask m = mask;
|
||||
|
||||
|
||||
for (int imgIdx = 0; imgIdx < n; ++imgIdx)
|
||||
{
|
||||
const DevMem2D_<T> train = trains[imgIdx];
|
||||
@ -388,9 +388,9 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
}
|
||||
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||
void matchUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||
void matchUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
||||
cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||
@ -408,9 +408,9 @@ namespace cv { namespace gpu { namespace device
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Match
|
||||
|
||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||
__device__ void loop(int queryIdx, const DevMem2D_<T>& query, volatile int imgIdx, const DevMem2D_<T>& train, const Mask& mask,
|
||||
typename Dist::value_type* s_query, typename Dist::value_type* s_train,
|
||||
typename Dist::value_type* s_query, typename Dist::value_type* s_train,
|
||||
float& bestDistance, int& bestTrainIdx, int& bestImgIdx)
|
||||
{
|
||||
for (int t = 0, endt = (train.rows + BLOCK_SIZE - 1) / BLOCK_SIZE; t < endt; ++t)
|
||||
@ -469,7 +469,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
typename Dist::value_type* s_query = (typename Dist::value_type*)(smem);
|
||||
typename Dist::value_type* s_train = (typename Dist::value_type*)(smem + BLOCK_SIZE * BLOCK_SIZE);
|
||||
|
||||
|
||||
loop<BLOCK_SIZE, Dist>(queryIdx, query, 0, train, mask, s_query, s_train, myBestDistance, myBestTrainIdx, myBestTrainIdx);
|
||||
|
||||
__syncthreads();
|
||||
@ -486,9 +486,9 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
}
|
||||
|
||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||
void match(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
||||
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||
void match(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
||||
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
||||
cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||
@ -504,7 +504,7 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
|
||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||
__global__ void match(const DevMem2D_<T> query, const DevMem2D_<T>* trains, int n, const Mask mask,
|
||||
__global__ void match(const DevMem2D_<T> query, const DevMem2D_<T>* trains, int n, const Mask mask,
|
||||
int* bestTrainIdx, int* bestImgIdx, float* bestDistance)
|
||||
{
|
||||
extern __shared__ int smem[];
|
||||
@ -542,9 +542,9 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
}
|
||||
|
||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||
void match(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||
void match(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
||||
cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||
@ -562,9 +562,9 @@ namespace cv { namespace gpu { namespace device
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Match dispatcher
|
||||
|
||||
template <typename Dist, typename T, typename Mask>
|
||||
void matchDispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
||||
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
||||
template <typename Dist, typename T, typename Mask>
|
||||
void matchDispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
||||
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
||||
int cc, cudaStream_t stream)
|
||||
{
|
||||
if (query.cols <= 64)
|
||||
@ -580,11 +580,11 @@ namespace cv { namespace gpu { namespace device
|
||||
matchUnrolled<16, 256, Dist>(query, train, mask, trainIdx, distance, stream);
|
||||
}
|
||||
else if (query.cols <= 512)
|
||||
{
|
||||
{
|
||||
matchUnrolled<16, 512, Dist>(query, train, mask, trainIdx, distance, stream);
|
||||
}
|
||||
else if (query.cols <= 1024)
|
||||
{
|
||||
{
|
||||
matchUnrolled<16, 1024, Dist>(query, train, mask, trainIdx, distance, stream);
|
||||
}*/
|
||||
else
|
||||
@ -593,9 +593,9 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Dist, typename T, typename Mask>
|
||||
void matchDispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
||||
template <typename Dist, typename T, typename Mask>
|
||||
void matchDispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
||||
int cc, cudaStream_t stream)
|
||||
{
|
||||
if (query.cols <= 64)
|
||||
@ -611,11 +611,11 @@ namespace cv { namespace gpu { namespace device
|
||||
matchUnrolled<16, 256, Dist>(query, trains, n, mask, trainIdx, imgIdx, distance, stream);
|
||||
}
|
||||
else if (query.cols <= 512)
|
||||
{
|
||||
{
|
||||
matchUnrolled<16, 512, Dist>(query, trains, n, mask, trainIdx, imgIdx, distance, stream);
|
||||
}
|
||||
else if (query.cols <= 1024)
|
||||
{
|
||||
{
|
||||
matchUnrolled<16, 1024, Dist>(query, trains, n, mask, trainIdx, imgIdx, distance, stream);
|
||||
}*/
|
||||
else
|
||||
@ -627,20 +627,20 @@ namespace cv { namespace gpu { namespace device
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Match caller
|
||||
|
||||
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask,
|
||||
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask,
|
||||
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
||||
int cc, cudaStream_t stream)
|
||||
{
|
||||
if (mask.data)
|
||||
{
|
||||
matchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), SingleMask(mask),
|
||||
trainIdx, distance,
|
||||
matchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), SingleMask(mask),
|
||||
trainIdx, distance,
|
||||
cc, stream);
|
||||
}
|
||||
else
|
||||
{
|
||||
matchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), WithOutMask(),
|
||||
trainIdx, distance,
|
||||
matchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), WithOutMask(),
|
||||
trainIdx, distance,
|
||||
cc, stream);
|
||||
}
|
||||
}
|
||||
@ -652,20 +652,20 @@ namespace cv { namespace gpu { namespace device
|
||||
template void matchL1_gpu<int >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||
template void matchL1_gpu<float >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||
|
||||
template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask,
|
||||
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
||||
template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask,
|
||||
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
||||
int cc, cudaStream_t stream)
|
||||
{
|
||||
if (mask.data)
|
||||
{
|
||||
matchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), SingleMask(mask),
|
||||
trainIdx, distance,
|
||||
matchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), SingleMask(mask),
|
||||
trainIdx, distance,
|
||||
cc, stream);
|
||||
}
|
||||
else
|
||||
{
|
||||
matchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), WithOutMask(),
|
||||
trainIdx, distance,
|
||||
matchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), WithOutMask(),
|
||||
trainIdx, distance,
|
||||
cc, stream);
|
||||
}
|
||||
}
|
||||
@ -677,20 +677,20 @@ namespace cv { namespace gpu { namespace device
|
||||
//template void matchL2_gpu<int >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||
template void matchL2_gpu<float >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||
|
||||
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask,
|
||||
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
||||
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask,
|
||||
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
||||
int cc, cudaStream_t stream)
|
||||
{
|
||||
if (mask.data)
|
||||
{
|
||||
matchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), SingleMask(mask),
|
||||
trainIdx, distance,
|
||||
matchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), SingleMask(mask),
|
||||
trainIdx, distance,
|
||||
cc, stream);
|
||||
}
|
||||
else
|
||||
{
|
||||
matchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), WithOutMask(),
|
||||
trainIdx, distance,
|
||||
matchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), WithOutMask(),
|
||||
trainIdx, distance,
|
||||
cc, stream);
|
||||
}
|
||||
}
|
||||
@ -701,20 +701,20 @@ namespace cv { namespace gpu { namespace device
|
||||
//template void matchHamming_gpu<short >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||
template void matchHamming_gpu<int >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||
|
||||
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
||||
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
||||
int cc, cudaStream_t stream)
|
||||
{
|
||||
if (masks.data)
|
||||
{
|
||||
matchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, MaskCollection(masks.data),
|
||||
trainIdx, imgIdx, distance,
|
||||
matchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, MaskCollection(masks.data),
|
||||
trainIdx, imgIdx, distance,
|
||||
cc, stream);
|
||||
}
|
||||
else
|
||||
{
|
||||
matchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, WithOutMask(),
|
||||
trainIdx, imgIdx, distance,
|
||||
matchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, WithOutMask(),
|
||||
trainIdx, imgIdx, distance,
|
||||
cc, stream);
|
||||
}
|
||||
}
|
||||
@ -726,20 +726,20 @@ namespace cv { namespace gpu { namespace device
|
||||
template void matchL1_gpu<int >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||
template void matchL1_gpu<float >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||
|
||||
template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
||||
template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
||||
int cc, cudaStream_t stream)
|
||||
{
|
||||
if (masks.data)
|
||||
{
|
||||
matchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, MaskCollection(masks.data),
|
||||
trainIdx, imgIdx, distance,
|
||||
matchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, MaskCollection(masks.data),
|
||||
trainIdx, imgIdx, distance,
|
||||
cc, stream);
|
||||
}
|
||||
else
|
||||
{
|
||||
matchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, WithOutMask(),
|
||||
trainIdx, imgIdx, distance,
|
||||
matchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, WithOutMask(),
|
||||
trainIdx, imgIdx, distance,
|
||||
cc, stream);
|
||||
}
|
||||
}
|
||||
@ -751,20 +751,20 @@ namespace cv { namespace gpu { namespace device
|
||||
//template void matchL2_gpu<int >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||
template void matchL2_gpu<float >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||
|
||||
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
||||
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
||||
int cc, cudaStream_t stream)
|
||||
{
|
||||
if (masks.data)
|
||||
{
|
||||
matchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, MaskCollection(masks.data),
|
||||
trainIdx, imgIdx, distance,
|
||||
matchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, MaskCollection(masks.data),
|
||||
trainIdx, imgIdx, distance,
|
||||
cc, stream);
|
||||
}
|
||||
else
|
||||
{
|
||||
matchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, WithOutMask(),
|
||||
trainIdx, imgIdx, distance,
|
||||
matchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, WithOutMask(),
|
||||
trainIdx, imgIdx, distance,
|
||||
cc, stream);
|
||||
}
|
||||
}
|
||||
|
@ -45,9 +45,9 @@
|
||||
#include "opencv2/gpu/device/vec_distance.hpp"
|
||||
#include "opencv2/gpu/device/datamov_utils.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace bf_radius_match
|
||||
namespace bf_radius_match
|
||||
{
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Match Unrolled
|
||||
@ -112,8 +112,8 @@ namespace cv { namespace gpu { namespace device
|
||||
#endif
|
||||
}
|
||||
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||
void matchUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>& train, float maxDistance, const Mask& mask,
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||
void matchUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>& train, float maxDistance, const Mask& mask,
|
||||
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||
@ -121,17 +121,17 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
|
||||
|
||||
matchUnrolled<BLOCK_SIZE, MAX_DESC_LEN, false, Dist><<<grid, block, smemSize, stream>>>(query, 0, train, maxDistance, mask,
|
||||
matchUnrolled<BLOCK_SIZE, MAX_DESC_LEN, false, Dist><<<grid, block, smemSize, stream>>>(query, 0, train, maxDistance, mask,
|
||||
trainIdx, PtrStepi(), distance, nMatches.data, trainIdx.cols);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T>
|
||||
void matchUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, float maxDistance, const DevMem2Db* masks,
|
||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T>
|
||||
void matchUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, float maxDistance, const DevMem2Db* masks,
|
||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||
cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||
@ -146,12 +146,12 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
if (masks != 0 && masks[i].data)
|
||||
{
|
||||
matchUnrolled<BLOCK_SIZE, MAX_DESC_LEN, true, Dist><<<grid, block, smemSize, stream>>>(query, i, train, maxDistance, SingleMask(masks[i]),
|
||||
matchUnrolled<BLOCK_SIZE, MAX_DESC_LEN, true, Dist><<<grid, block, smemSize, stream>>>(query, i, train, maxDistance, SingleMask(masks[i]),
|
||||
trainIdx, imgIdx, distance, nMatches.data, trainIdx.cols);
|
||||
}
|
||||
else
|
||||
{
|
||||
matchUnrolled<BLOCK_SIZE, MAX_DESC_LEN, true, Dist><<<grid, block, smemSize, stream>>>(query, i, train, maxDistance, WithOutMask(),
|
||||
matchUnrolled<BLOCK_SIZE, MAX_DESC_LEN, true, Dist><<<grid, block, smemSize, stream>>>(query, i, train, maxDistance, WithOutMask(),
|
||||
trainIdx, imgIdx, distance, nMatches.data, trainIdx.cols);
|
||||
}
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
@ -223,9 +223,9 @@ namespace cv { namespace gpu { namespace device
|
||||
#endif
|
||||
}
|
||||
|
||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||
void match(const DevMem2D_<T>& query, const DevMem2D_<T>& train, float maxDistance, const Mask& mask,
|
||||
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||
void match(const DevMem2D_<T>& query, const DevMem2D_<T>& train, float maxDistance, const Mask& mask,
|
||||
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||
cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||
@ -233,7 +233,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
|
||||
|
||||
match<BLOCK_SIZE, false, Dist><<<grid, block, smemSize, stream>>>(query, 0, train, maxDistance, mask,
|
||||
match<BLOCK_SIZE, false, Dist><<<grid, block, smemSize, stream>>>(query, 0, train, maxDistance, mask,
|
||||
trainIdx, PtrStepi(), distance, nMatches.data, trainIdx.cols);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
@ -241,9 +241,9 @@ namespace cv { namespace gpu { namespace device
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <int BLOCK_SIZE, typename Dist, typename T>
|
||||
void match(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, float maxDistance, const DevMem2Db* masks,
|
||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||
template <int BLOCK_SIZE, typename Dist, typename T>
|
||||
void match(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, float maxDistance, const DevMem2Db* masks,
|
||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||
cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||
@ -258,12 +258,12 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
if (masks != 0 && masks[i].data)
|
||||
{
|
||||
match<BLOCK_SIZE, true, Dist><<<grid, block, smemSize, stream>>>(query, i, train, maxDistance, SingleMask(masks[i]),
|
||||
match<BLOCK_SIZE, true, Dist><<<grid, block, smemSize, stream>>>(query, i, train, maxDistance, SingleMask(masks[i]),
|
||||
trainIdx, imgIdx, distance, nMatches.data, trainIdx.cols);
|
||||
}
|
||||
else
|
||||
{
|
||||
match<BLOCK_SIZE, true, Dist><<<grid, block, smemSize, stream>>>(query, i, train, maxDistance, WithOutMask(),
|
||||
match<BLOCK_SIZE, true, Dist><<<grid, block, smemSize, stream>>>(query, i, train, maxDistance, WithOutMask(),
|
||||
trainIdx, imgIdx, distance, nMatches.data, trainIdx.cols);
|
||||
}
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
@ -276,9 +276,9 @@ namespace cv { namespace gpu { namespace device
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Match dispatcher
|
||||
|
||||
template <typename Dist, typename T, typename Mask>
|
||||
void matchDispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>& train, float maxDistance, const Mask& mask,
|
||||
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||
template <typename Dist, typename T, typename Mask>
|
||||
void matchDispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>& train, float maxDistance, const Mask& mask,
|
||||
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||
int cc, cudaStream_t stream)
|
||||
{
|
||||
if (query.cols <= 64)
|
||||
@ -294,11 +294,11 @@ namespace cv { namespace gpu { namespace device
|
||||
matchUnrolled<16, 256, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream);
|
||||
}
|
||||
else if (query.cols <= 512)
|
||||
{
|
||||
{
|
||||
matchUnrolled<16, 512, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream);
|
||||
}
|
||||
else if (query.cols <= 1024)
|
||||
{
|
||||
{
|
||||
matchUnrolled<16, 1024, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream);
|
||||
}*/
|
||||
else
|
||||
@ -307,9 +307,9 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Dist, typename T>
|
||||
void matchDispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, float maxDistance, const DevMem2Db* masks,
|
||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||
template <typename Dist, typename T>
|
||||
void matchDispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, float maxDistance, const DevMem2Db* masks,
|
||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||
int cc, cudaStream_t stream)
|
||||
{
|
||||
if (query.cols <= 64)
|
||||
@ -325,36 +325,36 @@ namespace cv { namespace gpu { namespace device
|
||||
matchUnrolled<16, 256, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream);
|
||||
}
|
||||
else if (query.cols <= 512)
|
||||
{
|
||||
{
|
||||
matchUnrolled<16, 512, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream);
|
||||
}
|
||||
else if (query.cols <= 1024)
|
||||
{
|
||||
{
|
||||
matchUnrolled<16, 1024, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream);
|
||||
}*/
|
||||
else
|
||||
{
|
||||
match<16, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Radius Match caller
|
||||
|
||||
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask,
|
||||
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask,
|
||||
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||
int cc, cudaStream_t stream)
|
||||
{
|
||||
if (mask.data)
|
||||
{
|
||||
matchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), maxDistance, SingleMask(mask),
|
||||
trainIdx, distance, nMatches,
|
||||
matchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), maxDistance, SingleMask(mask),
|
||||
trainIdx, distance, nMatches,
|
||||
cc, stream);
|
||||
}
|
||||
else
|
||||
{
|
||||
matchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), maxDistance, WithOutMask(),
|
||||
trainIdx, distance, nMatches,
|
||||
matchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), maxDistance, WithOutMask(),
|
||||
trainIdx, distance, nMatches,
|
||||
cc, stream);
|
||||
}
|
||||
}
|
||||
@ -366,20 +366,20 @@ namespace cv { namespace gpu { namespace device
|
||||
template void matchL1_gpu<int >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
template void matchL1_gpu<float >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
|
||||
template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask,
|
||||
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||
template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask,
|
||||
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||
int cc, cudaStream_t stream)
|
||||
{
|
||||
if (mask.data)
|
||||
{
|
||||
matchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), maxDistance, SingleMask(mask),
|
||||
trainIdx, distance, nMatches,
|
||||
matchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), maxDistance, SingleMask(mask),
|
||||
trainIdx, distance, nMatches,
|
||||
cc, stream);
|
||||
}
|
||||
else
|
||||
{
|
||||
matchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), maxDistance, WithOutMask(),
|
||||
trainIdx, distance, nMatches,
|
||||
matchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), maxDistance, WithOutMask(),
|
||||
trainIdx, distance, nMatches,
|
||||
cc, stream);
|
||||
}
|
||||
}
|
||||
@ -391,20 +391,20 @@ namespace cv { namespace gpu { namespace device
|
||||
//template void matchL2_gpu<int >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
template void matchL2_gpu<float >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
|
||||
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask,
|
||||
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask,
|
||||
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||
int cc, cudaStream_t stream)
|
||||
{
|
||||
if (mask.data)
|
||||
{
|
||||
matchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), maxDistance, SingleMask(mask),
|
||||
trainIdx, distance, nMatches,
|
||||
matchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), maxDistance, SingleMask(mask),
|
||||
trainIdx, distance, nMatches,
|
||||
cc, stream);
|
||||
}
|
||||
else
|
||||
{
|
||||
matchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), maxDistance, WithOutMask(),
|
||||
trainIdx, distance, nMatches,
|
||||
matchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), maxDistance, WithOutMask(),
|
||||
trainIdx, distance, nMatches,
|
||||
cc, stream);
|
||||
}
|
||||
}
|
||||
@ -415,12 +415,12 @@ namespace cv { namespace gpu { namespace device
|
||||
//template void matchHamming_gpu<short >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
template void matchHamming_gpu<int >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
|
||||
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks,
|
||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks,
|
||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||
int cc, cudaStream_t stream)
|
||||
{
|
||||
matchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains, n, maxDistance, masks,
|
||||
trainIdx, imgIdx, distance, nMatches,
|
||||
matchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains, n, maxDistance, masks,
|
||||
trainIdx, imgIdx, distance, nMatches,
|
||||
cc, stream);
|
||||
}
|
||||
|
||||
@ -431,12 +431,12 @@ namespace cv { namespace gpu { namespace device
|
||||
template void matchL1_gpu<int >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
template void matchL1_gpu<float >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
|
||||
template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks,
|
||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||
template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks,
|
||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||
int cc, cudaStream_t stream)
|
||||
{
|
||||
matchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains, n, maxDistance, masks,
|
||||
trainIdx, imgIdx, distance, nMatches,
|
||||
matchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains, n, maxDistance, masks,
|
||||
trainIdx, imgIdx, distance, nMatches,
|
||||
cc, stream);
|
||||
}
|
||||
|
||||
@ -447,12 +447,12 @@ namespace cv { namespace gpu { namespace device
|
||||
//template void matchL2_gpu<int >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
template void matchL2_gpu<float >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
|
||||
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks,
|
||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks,
|
||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||
int cc, cudaStream_t stream)
|
||||
{
|
||||
matchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains, n, maxDistance, masks,
|
||||
trainIdx, imgIdx, distance, nMatches,
|
||||
matchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains, n, maxDistance, masks,
|
||||
trainIdx, imgIdx, distance, nMatches,
|
||||
cc, stream);
|
||||
}
|
||||
|
||||
|
@ -43,9 +43,9 @@
|
||||
#include "internal_shared.hpp"
|
||||
#include "opencv2/gpu/device/limits.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace bilateral_filter
|
||||
namespace bilateral_filter
|
||||
{
|
||||
__constant__ float* ctable_color;
|
||||
__constant__ float* ctable_space;
|
||||
@ -108,7 +108,7 @@ namespace cv { namespace gpu { namespace device
|
||||
dp[3] = *(disp + (y+1) * disp_step + x + 0);
|
||||
dp[4] = *(disp + (y ) * disp_step + x + 1);
|
||||
|
||||
if(::abs(dp[1] - dp[0]) >= cedge_disc || ::abs(dp[2] - dp[0]) >= cedge_disc || ::abs(dp[3] - dp[0]) >= cedge_disc || ::abs(dp[4] - dp[0]) >= cedge_disc)
|
||||
if(::abs(dp[1] - dp[0]) >= cedge_disc || ::abs(dp[2] - dp[0]) >= cedge_disc || ::abs(dp[3] - dp[0]) >= cedge_disc || ::abs(dp[4] - dp[0]) >= cedge_disc)
|
||||
{
|
||||
const int ymin = ::max(0, y - cradius);
|
||||
const int xmin = ::max(0, x - cradius);
|
||||
@ -175,7 +175,7 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
template <typename T>
|
||||
void bilateral_filter_caller(DevMem2D_<T> disp, DevMem2Db img, int channels, int iters, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads(32, 8, 1);
|
||||
|
@ -42,9 +42,9 @@
|
||||
|
||||
#include "internal_shared.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace blend
|
||||
namespace blend
|
||||
{
|
||||
template <typename T>
|
||||
__global__ void blendLinearKernel(int rows, int cols, int cn, const PtrStep<T> img1, const PtrStep<T> img2,
|
||||
@ -62,14 +62,14 @@ namespace cv { namespace gpu { namespace device
|
||||
T p2 = img2.ptr(y)[x];
|
||||
result.ptr(y)[x] = (p1 * w1 + p2 * w2) / (w1 + w2 + 1e-5f);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void blendLinearCaller(int rows, int cols, int cn, PtrStep<T> img1, PtrStep<T> img2, PtrStepf weights1, PtrStepf weights2, PtrStep<T> result, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads(16, 16);
|
||||
dim3 grid(divUp(cols * cn, threads.x), divUp(rows, threads.y));
|
||||
|
||||
|
||||
blendLinearKernel<<<grid, threads, 0, stream>>>(rows, cols * cn, cn, img1, img2, weights1, weights2, result);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
@ -105,12 +105,12 @@ namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
dim3 threads(16, 16);
|
||||
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
|
||||
|
||||
|
||||
blendLinearKernel8UC4<<<grid, threads, 0, stream>>>(rows, cols, img1, img2, weights1, weights2, result);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
} // namespace blend
|
||||
} // namespace blend
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
@ -44,7 +44,7 @@
|
||||
#include "opencv2/gpu/device/transform.hpp"
|
||||
#include "opencv2/gpu/device/functional.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
#define SOLVE_PNP_RANSAC_MAX_NUM_ITERS 200
|
||||
|
||||
|
@ -44,9 +44,9 @@
|
||||
#include <algorithm>
|
||||
#include "internal_shared.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace canny
|
||||
namespace canny
|
||||
{
|
||||
__global__ void calcSobelRowPass(const PtrStepb src, PtrStepi dx_buf, PtrStepi dy_buf, int rows, int cols)
|
||||
{
|
||||
@ -99,7 +99,7 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Norm> __global__ void calcMagnitude(const PtrStepi dx_buf, const PtrStepi dy_buf,
|
||||
template <typename Norm> __global__ void calcMagnitude(const PtrStepi dx_buf, const PtrStepi dy_buf,
|
||||
PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols)
|
||||
{
|
||||
__shared__ int sdx[18][16];
|
||||
@ -175,7 +175,7 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
#define CANNY_SHIFT 15
|
||||
#define TG22 (int)(0.4142135623730950488016887242097*(1<<CANNY_SHIFT) + 0.5)
|
||||
|
||||
@ -236,7 +236,7 @@ namespace cv { namespace gpu { namespace device
|
||||
edge_type = 1 + (int)(m > high_thresh);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
map.ptr(i + 1)[j + 1] = edge_type;
|
||||
}
|
||||
}
|
||||
@ -270,7 +270,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
const int tid = threadIdx.y * 16 + threadIdx.x;
|
||||
const int lx = tid % 18;
|
||||
const int ly = tid / 18;
|
||||
const int ly = tid / 18;
|
||||
|
||||
if (ly < 14)
|
||||
smem[ly][lx] = map.ptr(blockIdx.y * 16 + ly)[blockIdx.x * 16 + lx];
|
||||
@ -294,10 +294,10 @@ namespace cv { namespace gpu { namespace device
|
||||
n += smem[threadIdx.y ][threadIdx.x ] == 2;
|
||||
n += smem[threadIdx.y ][threadIdx.x + 1] == 2;
|
||||
n += smem[threadIdx.y ][threadIdx.x + 2] == 2;
|
||||
|
||||
|
||||
n += smem[threadIdx.y + 1][threadIdx.x ] == 2;
|
||||
n += smem[threadIdx.y + 1][threadIdx.x + 2] == 2;
|
||||
|
||||
|
||||
n += smem[threadIdx.y + 2][threadIdx.x ] == 2;
|
||||
n += smem[threadIdx.y + 2][threadIdx.x + 1] == 2;
|
||||
n += smem[threadIdx.y + 2][threadIdx.x + 2] == 2;
|
||||
@ -318,10 +318,10 @@ namespace cv { namespace gpu { namespace device
|
||||
n += smem[threadIdx.y ][threadIdx.x ] == 1;
|
||||
n += smem[threadIdx.y ][threadIdx.x + 1] == 1;
|
||||
n += smem[threadIdx.y ][threadIdx.x + 2] == 1;
|
||||
|
||||
|
||||
n += smem[threadIdx.y + 1][threadIdx.x ] == 1;
|
||||
n += smem[threadIdx.y + 1][threadIdx.x + 2] == 1;
|
||||
|
||||
|
||||
n += smem[threadIdx.y + 2][threadIdx.x ] == 1;
|
||||
n += smem[threadIdx.y + 2][threadIdx.x + 1] == 1;
|
||||
n += smem[threadIdx.y + 2][threadIdx.x + 2] == 1;
|
||||
@ -361,7 +361,7 @@ namespace cv { namespace gpu { namespace device
|
||||
#if __CUDA_ARCH__ >= 120
|
||||
|
||||
const int stack_size = 512;
|
||||
|
||||
|
||||
__shared__ unsigned int s_counter;
|
||||
__shared__ unsigned int s_ind;
|
||||
__shared__ ushort2 s_st[stack_size];
|
||||
@ -404,11 +404,11 @@ namespace cv { namespace gpu { namespace device
|
||||
if (subTaskIdx < portion)
|
||||
pos = s_st[s_counter - 1 - subTaskIdx];
|
||||
__syncthreads();
|
||||
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
s_counter -= portion;
|
||||
__syncthreads();
|
||||
|
||||
|
||||
if (pos.x > 0 && pos.x <= cols && pos.y > 0 && pos.y <= rows)
|
||||
{
|
||||
pos.x += c_dx[threadIdx.x & 7];
|
||||
@ -452,7 +452,7 @@ namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
void* counter_ptr;
|
||||
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, counter) );
|
||||
|
||||
|
||||
unsigned int count;
|
||||
cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(unsigned int), cudaMemcpyDeviceToHost) );
|
||||
|
||||
|
@ -45,7 +45,7 @@
|
||||
#include <opencv2/gpu/device/color.hpp>
|
||||
#include <cvt_colot_internal.h>
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_rgba_traits<uchar>::functor_type)
|
||||
{
|
||||
@ -153,7 +153,7 @@ namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
};
|
||||
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_xyz4_traits<uchar>::functor_type)
|
||||
{
|
||||
|
@ -48,9 +48,9 @@
|
||||
#include "opencv2/gpu/device/border_interpolate.hpp"
|
||||
#include "opencv2/gpu/device/static_check.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace column_filter
|
||||
namespace column_filter
|
||||
{
|
||||
#define MAX_KERNEL_SIZE 32
|
||||
|
||||
@ -146,7 +146,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
const dim3 block(BLOCK_DIM_X, BLOCK_DIM_Y);
|
||||
const dim3 grid(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y * PATCH_PER_BLOCK));
|
||||
|
||||
|
||||
B<T> brd(src.rows);
|
||||
|
||||
linearColumnFilter<KSIZE, T, D><<<grid, block, 0, stream>>>(src, dst, anchor, brd);
|
||||
@ -162,7 +162,7 @@ namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
typedef void (*caller_t)(DevMem2D_<T> src, DevMem2D_<D> dst, int anchor, int cc, cudaStream_t stream);
|
||||
|
||||
static const caller_t callers[5][33] =
|
||||
static const caller_t callers[5][33] =
|
||||
{
|
||||
{
|
||||
0,
|
||||
@ -338,9 +338,9 @@ namespace cv { namespace gpu { namespace device
|
||||
linearColumnFilter_caller<30, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller<31, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller<32, T, D, BrdColWrap>
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
loadKernel(kernel, ksize);
|
||||
|
||||
callers[brd_type][ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor, cc, stream);
|
||||
|
@ -43,9 +43,9 @@
|
||||
#include "internal_shared.hpp"
|
||||
#include "opencv2/gpu/device/border_interpolate.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace imgproc
|
||||
namespace imgproc
|
||||
{
|
||||
template <typename Ptr2D, typename T> __global__ void copyMakeBorder(const Ptr2D src, DevMem2D_<T> dst, int top, int left)
|
||||
{
|
||||
@ -58,9 +58,9 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
template <template <typename> class B, typename T> struct CopyMakeBorderDispatcher
|
||||
{
|
||||
static void call(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, int top, int left,
|
||||
static void call(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, int top, int left,
|
||||
const typename VecTraits<T>::elem_type* borderValue, cudaStream_t stream)
|
||||
{
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
@ -75,20 +75,20 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, int cn> void copyMakeBorder_gpu(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode,
|
||||
template <typename T, int cn> void copyMakeBorder_gpu(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode,
|
||||
const T* borderValue, cudaStream_t stream)
|
||||
{
|
||||
typedef typename TypeVec<T, cn>::vec_type vec_type;
|
||||
|
||||
typedef void (*caller_t)(const DevMem2D_<vec_type>& src, const DevMem2D_<vec_type>& dst, int top, int left, const T* borderValue, cudaStream_t stream);
|
||||
|
||||
static const caller_t callers[5] =
|
||||
static const caller_t callers[5] =
|
||||
{
|
||||
CopyMakeBorderDispatcher<BrdReflect101, vec_type>::call,
|
||||
CopyMakeBorderDispatcher<BrdReplicate, vec_type>::call,
|
||||
CopyMakeBorderDispatcher<BrdConstant, vec_type>::call,
|
||||
CopyMakeBorderDispatcher<BrdReflect, vec_type>::call,
|
||||
CopyMakeBorderDispatcher<BrdWrap, vec_type>::call
|
||||
CopyMakeBorderDispatcher<BrdReflect101, vec_type>::call,
|
||||
CopyMakeBorderDispatcher<BrdReplicate, vec_type>::call,
|
||||
CopyMakeBorderDispatcher<BrdConstant, vec_type>::call,
|
||||
CopyMakeBorderDispatcher<BrdReflect, vec_type>::call,
|
||||
CopyMakeBorderDispatcher<BrdWrap, vec_type>::call
|
||||
};
|
||||
|
||||
callers[borderMode](DevMem2D_<vec_type>(src), DevMem2D_<vec_type>(dst), top, left, borderValue, stream);
|
||||
|
@ -40,7 +40,7 @@
|
||||
//
|
||||
// Copyright (c) 2010, Paul Furgale, Chi Hay Tong
|
||||
//
|
||||
// The original code was written by Paul Furgale and Chi Hay Tong
|
||||
// The original code was written by Paul Furgale and Chi Hay Tong
|
||||
// and later optimized and prepared for integration into OpenCV by Itseez.
|
||||
//
|
||||
//M*/
|
||||
@ -48,9 +48,9 @@
|
||||
#include "opencv2/gpu/device/common.hpp"
|
||||
#include "opencv2/gpu/device/utility.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace fast
|
||||
namespace fast
|
||||
{
|
||||
__device__ unsigned int g_counter = 0;
|
||||
|
||||
@ -78,14 +78,14 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
|
||||
|
||||
d1 = diffType(v, C[0] & 0xff, th);
|
||||
d1 = diffType(v, C[0] & 0xff, th);
|
||||
d2 = diffType(v, C[2] & 0xff, th);
|
||||
|
||||
if ((d1 | d2) == 0)
|
||||
return;
|
||||
|
||||
mask1 |= (d1 & 1) << 0;
|
||||
mask2 |= ((d1 & 2) >> 1) << 0;
|
||||
mask2 |= ((d1 & 2) >> 1) << 0;
|
||||
|
||||
mask1 |= (d2 & 1) << 8;
|
||||
mask2 |= ((d2 & 2) >> 1) << 8;
|
||||
@ -141,7 +141,7 @@ namespace cv { namespace gpu { namespace device
|
||||
return;*/
|
||||
|
||||
mask1 |= (d1 & 1) << 1;
|
||||
mask2 |= ((d1 & 2) >> 1) << 1;
|
||||
mask2 |= ((d1 & 2) >> 1) << 1;
|
||||
|
||||
mask1 |= (d2 & 1) << 9;
|
||||
mask2 |= ((d2 & 2) >> 1) << 9;
|
||||
@ -169,7 +169,7 @@ namespace cv { namespace gpu { namespace device
|
||||
return;*/
|
||||
|
||||
mask1 |= (d1 & 1) << 5;
|
||||
mask2 |= ((d1 & 2) >> 1) << 5;
|
||||
mask2 |= ((d1 & 2) >> 1) << 5;
|
||||
|
||||
mask1 |= (d2 & 1) << 13;
|
||||
mask2 |= ((d2 & 2) >> 1) << 13;
|
||||
@ -191,7 +191,7 @@ namespace cv { namespace gpu { namespace device
|
||||
// 0 -> not a keypoint
|
||||
__device__ __forceinline__ bool isKeyPoint(int mask1, int mask2)
|
||||
{
|
||||
return (__popc(mask1) > 8 && (c_table[(mask1 >> 3) - 63] & (1 << (mask1 & 7)))) ||
|
||||
return (__popc(mask1) > 8 && (c_table[(mask1 >> 3) - 63] & (1 << (mask1 & 7)))) ||
|
||||
(__popc(mask2) > 8 && (c_table[(mask2 >> 3) - 63] & (1 << (mask2 & 7))));
|
||||
}
|
||||
|
||||
@ -212,14 +212,14 @@ namespace cv { namespace gpu { namespace device
|
||||
calcMask(C, v, mid, mask1, mask2);
|
||||
|
||||
int isKp = static_cast<int>(isKeyPoint(mask1, mask2));
|
||||
|
||||
|
||||
min = isKp * (mid + 1) + (isKp ^ 1) * min;
|
||||
max = (isKp ^ 1) * (mid - 1) + isKp * max;
|
||||
}
|
||||
|
||||
return min - 1;
|
||||
}
|
||||
|
||||
|
||||
template <bool calcScore, class Mask>
|
||||
__global__ void calcKeypoints(const DevMem2Db img, const Mask mask, short2* kpLoc, const unsigned int maxKeypoints, PtrStepi score, const int threshold)
|
||||
{
|
||||
@ -243,7 +243,7 @@ namespace cv { namespace gpu { namespace device
|
||||
C[2] |= static_cast<uint>(img(i - 1, j - 3)) << (3 * 8);
|
||||
C[1] |= static_cast<uint>(img(i - 1, j + 3)) << 8;
|
||||
|
||||
C[3] |= static_cast<uint>(img(i, j - 3));
|
||||
C[3] |= static_cast<uint>(img(i, j - 3));
|
||||
v = static_cast<int>(img(i, j));
|
||||
C[1] |= static_cast<uint>(img(i, j + 3));
|
||||
|
||||
@ -313,7 +313,7 @@ namespace cv { namespace gpu { namespace device
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
|
||||
unsigned int count;
|
||||
cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(unsigned int), cudaMemcpyDeviceToHost) );
|
||||
|
||||
@ -335,14 +335,14 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
int score = scoreMat(loc.y, loc.x);
|
||||
|
||||
bool ismax =
|
||||
bool ismax =
|
||||
score > scoreMat(loc.y - 1, loc.x - 1) &&
|
||||
score > scoreMat(loc.y - 1, loc.x ) &&
|
||||
score > scoreMat(loc.y - 1, loc.x + 1) &&
|
||||
|
||||
score > scoreMat(loc.y , loc.x - 1) &&
|
||||
score > scoreMat(loc.y , loc.x + 1) &&
|
||||
|
||||
|
||||
score > scoreMat(loc.y + 1, loc.x - 1) &&
|
||||
score > scoreMat(loc.y + 1, loc.x ) &&
|
||||
score > scoreMat(loc.y + 1, loc.x + 1);
|
||||
@ -375,7 +375,7 @@ namespace cv { namespace gpu { namespace device
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
|
||||
unsigned int new_count;
|
||||
cudaSafeCall( cudaMemcpy(&new_count, counter_ptr, sizeof(unsigned int), cudaMemcpyDeviceToHost) );
|
||||
|
||||
|
@ -40,7 +40,7 @@
|
||||
//
|
||||
// Copyright (c) 2010, Paul Furgale, Chi Hay Tong
|
||||
//
|
||||
// The original code was written by Paul Furgale and Chi Hay Tong
|
||||
// The original code was written by Paul Furgale and Chi Hay Tong
|
||||
// and later optimized and prepared for integration into OpenCV by Itseez.
|
||||
//
|
||||
//M*/
|
||||
@ -50,9 +50,9 @@
|
||||
#include "opencv2/gpu/device/common.hpp"
|
||||
#include "opencv2/gpu/device/utility.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace gfft
|
||||
namespace gfft
|
||||
{
|
||||
texture<float, cudaTextureType2D, cudaReadModeElementType> eigTex(0, cudaFilterModePoint, cudaAddressModeClamp);
|
||||
|
||||
@ -117,7 +117,7 @@ namespace cv { namespace gpu { namespace device
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
|
||||
uint count;
|
||||
cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(uint), cudaMemcpyDeviceToHost) );
|
||||
|
||||
@ -126,9 +126,9 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
class EigGreater
|
||||
{
|
||||
public:
|
||||
__device__ __forceinline__ bool operator()(float2 a, float2 b) const
|
||||
{
|
||||
public:
|
||||
__device__ __forceinline__ bool operator()(float2 a, float2 b) const
|
||||
{
|
||||
return tex2D(eigTex, a.x, a.y) > tex2D(eigTex, b.x, b.y);
|
||||
}
|
||||
};
|
||||
|
@ -45,7 +45,7 @@
|
||||
#include "opencv2/gpu/device/utility.hpp"
|
||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
#define UINT_BITS 32U
|
||||
|
||||
@ -65,7 +65,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
#define USE_SMEM_ATOMICS (__CUDA_ARCH__ >= 120)
|
||||
|
||||
namespace hist
|
||||
namespace hist
|
||||
{
|
||||
#if (!USE_SMEM_ATOMICS)
|
||||
|
||||
@ -173,7 +173,7 @@ namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
histogram256<<<PARTIAL_HISTOGRAM256_COUNT, HISTOGRAM256_THREADBLOCK_SIZE, 0, stream>>>(
|
||||
DevMem2D_<uint>(src),
|
||||
buf,
|
||||
buf,
|
||||
static_cast<uint>(src.rows * src.step / sizeof(uint)),
|
||||
src.cols);
|
||||
|
||||
|
@ -42,7 +42,7 @@
|
||||
|
||||
#include "internal_shared.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
// Other values are not supported
|
||||
#define CELL_WIDTH 8
|
||||
@ -50,7 +50,7 @@ namespace cv { namespace gpu { namespace device
|
||||
#define CELLS_PER_BLOCK_X 2
|
||||
#define CELLS_PER_BLOCK_Y 2
|
||||
|
||||
namespace hog
|
||||
namespace hog
|
||||
{
|
||||
__constant__ int cnbins;
|
||||
__constant__ int cblock_stride_x;
|
||||
@ -63,7 +63,7 @@ namespace cv { namespace gpu { namespace device
|
||||
__constant__ int cdescr_width;
|
||||
|
||||
|
||||
/* Returns the nearest upper power of two, works only for
|
||||
/* Returns the nearest upper power of two, works only for
|
||||
the typical GPU thread count (pert block) values */
|
||||
int power_2up(unsigned int n)
|
||||
{
|
||||
@ -82,19 +82,19 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
|
||||
|
||||
void set_up_constants(int nbins, int block_stride_x, int block_stride_y,
|
||||
void set_up_constants(int nbins, int block_stride_x, int block_stride_y,
|
||||
int nblocks_win_x, int nblocks_win_y)
|
||||
{
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cnbins, &nbins, sizeof(nbins)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cblock_stride_x, &block_stride_x, sizeof(block_stride_x)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cblock_stride_y, &block_stride_y, sizeof(block_stride_y)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cnblocks_win_x, &nblocks_win_x, sizeof(nblocks_win_x)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cnblocks_win_y, &nblocks_win_y, sizeof(nblocks_win_y)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cnbins, &nbins, sizeof(nbins)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cblock_stride_x, &block_stride_x, sizeof(block_stride_x)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cblock_stride_y, &block_stride_y, sizeof(block_stride_y)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cnblocks_win_x, &nblocks_win_x, sizeof(nblocks_win_x)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cnblocks_win_y, &nblocks_win_y, sizeof(nblocks_win_y)) );
|
||||
|
||||
int block_hist_size = nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y;
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cblock_hist_size, &block_hist_size, sizeof(block_hist_size)) );
|
||||
int block_hist_size = nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y;
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cblock_hist_size, &block_hist_size, sizeof(block_hist_size)) );
|
||||
|
||||
int block_hist_size_2up = power_2up(block_hist_size);
|
||||
int block_hist_size_2up = power_2up(block_hist_size);
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cblock_hist_size_2up, &block_hist_size_2up, sizeof(block_hist_size_2up)) );
|
||||
|
||||
int descr_width = nblocks_win_x * block_hist_size;
|
||||
@ -110,7 +110,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
|
||||
template <int nblocks> // Number of histogram blocks processed by single GPU thread block
|
||||
__global__ void compute_hists_kernel_many_blocks(const int img_block_width, const PtrElemStepf grad,
|
||||
__global__ void compute_hists_kernel_many_blocks(const int img_block_width, const PtrElemStepf grad,
|
||||
const PtrElemStep qangle, float scale, float* block_hists)
|
||||
{
|
||||
const int block_x = threadIdx.z;
|
||||
@ -125,7 +125,7 @@ namespace cv { namespace gpu { namespace device
|
||||
float* hists = smem;
|
||||
float* final_hist = smem + cnbins * 48 * nblocks;
|
||||
|
||||
const int offset_x = (blockIdx.x * blockDim.z + block_x) * cblock_stride_x +
|
||||
const int offset_x = (blockIdx.x * blockDim.z + block_x) * cblock_stride_x +
|
||||
4 * cell_x + cell_thread_x;
|
||||
const int offset_y = blockIdx.y * cblock_stride_y + 4 * cell_y;
|
||||
|
||||
@ -135,8 +135,8 @@ namespace cv { namespace gpu { namespace device
|
||||
// 12 means that 12 pixels affect on block's cell (in one row)
|
||||
if (cell_thread_x < 12)
|
||||
{
|
||||
float* hist = hists + 12 * (cell_y * blockDim.z * CELLS_PER_BLOCK_Y +
|
||||
cell_x + block_x * CELLS_PER_BLOCK_X) +
|
||||
float* hist = hists + 12 * (cell_y * blockDim.z * CELLS_PER_BLOCK_Y +
|
||||
cell_x + block_x * CELLS_PER_BLOCK_X) +
|
||||
cell_thread_x;
|
||||
for (int bin_id = 0; bin_id < cnbins; ++bin_id)
|
||||
hist[bin_id * 48 * nblocks] = 0.f;
|
||||
@ -155,9 +155,9 @@ namespace cv { namespace gpu { namespace device
|
||||
int dist_center_y = dist_y - 4 * (1 - 2 * cell_y);
|
||||
int dist_center_x = dist_x - 4 * (1 - 2 * cell_x);
|
||||
|
||||
float gaussian = ::expf(-(dist_center_y * dist_center_y +
|
||||
float gaussian = ::expf(-(dist_center_y * dist_center_y +
|
||||
dist_center_x * dist_center_x) * scale);
|
||||
float interp_weight = (8.f - ::fabs(dist_y + 0.5f)) *
|
||||
float interp_weight = (8.f - ::fabs(dist_y + 0.5f)) *
|
||||
(8.f - ::fabs(dist_x + 0.5f)) / 64.f;
|
||||
|
||||
hist[bin.x * 48 * nblocks] += gaussian * interp_weight * vote.x;
|
||||
@ -169,41 +169,41 @@ namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
if (cell_thread_x < 6) hist_[0] += hist_[6];
|
||||
if (cell_thread_x < 3) hist_[0] += hist_[3];
|
||||
if (cell_thread_x == 0)
|
||||
final_hist[((cell_x + block_x * 2) * 2 + cell_y) * cnbins + bin_id]
|
||||
if (cell_thread_x == 0)
|
||||
final_hist[((cell_x + block_x * 2) * 2 + cell_y) * cnbins + bin_id]
|
||||
= hist_[0] + hist_[1] + hist_[2];
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
float* block_hist = block_hists + (blockIdx.y * img_block_width +
|
||||
blockIdx.x * blockDim.z + block_x) *
|
||||
cblock_hist_size;
|
||||
float* block_hist = block_hists + (blockIdx.y * img_block_width +
|
||||
blockIdx.x * blockDim.z + block_x) *
|
||||
cblock_hist_size;
|
||||
|
||||
int tid = (cell_y * CELLS_PER_BLOCK_Y + cell_x) * 16 + cell_thread_x;
|
||||
if (tid < cblock_hist_size)
|
||||
block_hist[tid] = final_hist[block_x * cblock_hist_size + tid];
|
||||
block_hist[tid] = final_hist[block_x * cblock_hist_size + tid];
|
||||
}
|
||||
|
||||
|
||||
void compute_hists(int nbins, int block_stride_x, int block_stride_y,
|
||||
int height, int width, const DevMem2Df& grad,
|
||||
const DevMem2Db& qangle, float sigma, float* block_hists)
|
||||
void compute_hists(int nbins, int block_stride_x, int block_stride_y,
|
||||
int height, int width, const DevMem2Df& grad,
|
||||
const DevMem2Db& qangle, float sigma, float* block_hists)
|
||||
{
|
||||
const int nblocks = 1;
|
||||
|
||||
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) /
|
||||
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) /
|
||||
block_stride_x;
|
||||
int img_block_height = (height - CELLS_PER_BLOCK_Y * CELL_HEIGHT + block_stride_y) /
|
||||
int img_block_height = (height - CELLS_PER_BLOCK_Y * CELL_HEIGHT + block_stride_y) /
|
||||
block_stride_y;
|
||||
|
||||
dim3 grid(divUp(img_block_width, nblocks), img_block_height);
|
||||
dim3 threads(32, 2, nblocks);
|
||||
|
||||
cudaSafeCall(cudaFuncSetCacheConfig(compute_hists_kernel_many_blocks<nblocks>,
|
||||
cudaSafeCall(cudaFuncSetCacheConfig(compute_hists_kernel_many_blocks<nblocks>,
|
||||
cudaFuncCachePreferL1));
|
||||
|
||||
|
||||
// Precompute gaussian spatial window parameter
|
||||
float scale = 1.f / (2.f * sigma * sigma);
|
||||
|
||||
@ -223,18 +223,18 @@ namespace cv { namespace gpu { namespace device
|
||||
//
|
||||
|
||||
|
||||
template<int size>
|
||||
template<int size>
|
||||
__device__ float reduce_smem(volatile float* smem)
|
||||
{
|
||||
{
|
||||
unsigned int tid = threadIdx.x;
|
||||
float sum = smem[tid];
|
||||
|
||||
if (size >= 512) { if (tid < 256) smem[tid] = sum = sum + smem[tid + 256]; __syncthreads(); }
|
||||
if (size >= 256) { if (tid < 128) smem[tid] = sum = sum + smem[tid + 128]; __syncthreads(); }
|
||||
if (size >= 128) { if (tid < 64) smem[tid] = sum = sum + smem[tid + 64]; __syncthreads(); }
|
||||
|
||||
|
||||
if (tid < 32)
|
||||
{
|
||||
{
|
||||
if (size >= 64) smem[tid] = sum = sum + smem[tid + 32];
|
||||
if (size >= 32) smem[tid] = sum = sum + smem[tid + 16];
|
||||
if (size >= 16) smem[tid] = sum = sum + smem[tid + 8];
|
||||
@ -245,54 +245,54 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
__syncthreads();
|
||||
sum = smem[0];
|
||||
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
|
||||
template <int nthreads, // Number of threads which process one block historgam
|
||||
template <int nthreads, // Number of threads which process one block historgam
|
||||
int nblocks> // Number of block hisograms processed by one GPU thread block
|
||||
__global__ void normalize_hists_kernel_many_blocks(const int block_hist_size,
|
||||
const int img_block_width,
|
||||
const int img_block_width,
|
||||
float* block_hists, float threshold)
|
||||
{
|
||||
if (blockIdx.x * blockDim.z + threadIdx.z >= img_block_width)
|
||||
return;
|
||||
|
||||
float* hist = block_hists + (blockIdx.y * img_block_width +
|
||||
blockIdx.x * blockDim.z + threadIdx.z) *
|
||||
float* hist = block_hists + (blockIdx.y * img_block_width +
|
||||
blockIdx.x * blockDim.z + threadIdx.z) *
|
||||
block_hist_size + threadIdx.x;
|
||||
|
||||
|
||||
__shared__ float sh_squares[nthreads * nblocks];
|
||||
float* squares = sh_squares + threadIdx.z * nthreads;
|
||||
|
||||
|
||||
float elem = 0.f;
|
||||
if (threadIdx.x < block_hist_size)
|
||||
elem = hist[0];
|
||||
|
||||
squares[threadIdx.x] = elem * elem;
|
||||
|
||||
squares[threadIdx.x] = elem * elem;
|
||||
|
||||
__syncthreads();
|
||||
float sum = reduce_smem<nthreads>(squares);
|
||||
|
||||
float scale = 1.0f / (::sqrtf(sum) + 0.1f * block_hist_size);
|
||||
|
||||
float scale = 1.0f / (::sqrtf(sum) + 0.1f * block_hist_size);
|
||||
elem = ::min(elem * scale, threshold);
|
||||
|
||||
|
||||
__syncthreads();
|
||||
squares[threadIdx.x] = elem * elem;
|
||||
|
||||
__syncthreads();
|
||||
sum = reduce_smem<nthreads>(squares);
|
||||
scale = 1.0f / (::sqrtf(sum) + 1e-3f);
|
||||
|
||||
|
||||
if (threadIdx.x < block_hist_size)
|
||||
hist[0] = elem * scale;
|
||||
}
|
||||
|
||||
|
||||
void normalize_hists(int nbins, int block_stride_x, int block_stride_y,
|
||||
void normalize_hists(int nbins, int block_stride_x, int block_stride_y,
|
||||
int height, int width, float* block_hists, float threshold)
|
||||
{
|
||||
{
|
||||
const int nblocks = 1;
|
||||
|
||||
int block_hist_size = nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y;
|
||||
@ -327,19 +327,19 @@ namespace cv { namespace gpu { namespace device
|
||||
//
|
||||
|
||||
|
||||
template <int nthreads, // Number of threads per one histogram block
|
||||
template <int nthreads, // Number of threads per one histogram block
|
||||
int nblocks> // Number of histogram block processed by single GPU thread block
|
||||
__global__ void classify_hists_kernel_many_blocks(const int img_win_width, const int img_block_width,
|
||||
__global__ void classify_hists_kernel_many_blocks(const int img_win_width, const int img_block_width,
|
||||
const int win_block_stride_x, const int win_block_stride_y,
|
||||
const float* block_hists, const float* coefs,
|
||||
float free_coef, float threshold, unsigned char* labels)
|
||||
{
|
||||
{
|
||||
const int win_x = threadIdx.z;
|
||||
if (blockIdx.x * blockDim.z + win_x >= img_win_width)
|
||||
return;
|
||||
|
||||
const float* hist = block_hists + (blockIdx.y * win_block_stride_y * img_block_width +
|
||||
blockIdx.x * win_block_stride_x * blockDim.z + win_x) *
|
||||
const float* hist = block_hists + (blockIdx.y * win_block_stride_y * img_block_width +
|
||||
blockIdx.x * win_block_stride_x * blockDim.z + win_x) *
|
||||
cblock_hist_size;
|
||||
|
||||
float product = 0.f;
|
||||
@ -357,24 +357,24 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (nthreads >= 512)
|
||||
{
|
||||
if (nthreads >= 512)
|
||||
{
|
||||
if (threadIdx.x < 256) products[tid] = product = product + products[tid + 256];
|
||||
__syncthreads();
|
||||
__syncthreads();
|
||||
}
|
||||
if (nthreads >= 256)
|
||||
{
|
||||
if (threadIdx.x < 128) products[tid] = product = product + products[tid + 128];
|
||||
__syncthreads();
|
||||
if (nthreads >= 256)
|
||||
{
|
||||
if (threadIdx.x < 128) products[tid] = product = product + products[tid + 128];
|
||||
__syncthreads();
|
||||
}
|
||||
if (nthreads >= 128)
|
||||
{
|
||||
if (threadIdx.x < 64) products[tid] = product = product + products[tid + 64];
|
||||
__syncthreads();
|
||||
if (nthreads >= 128)
|
||||
{
|
||||
if (threadIdx.x < 64) products[tid] = product = product + products[tid + 64];
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
|
||||
if (threadIdx.x < 32)
|
||||
{
|
||||
{
|
||||
volatile float* smem = products;
|
||||
if (nthreads >= 64) smem[tid] = product = product + smem[tid + 32];
|
||||
if (nthreads >= 32) smem[tid] = product = product + smem[tid + 16];
|
||||
@ -389,10 +389,10 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
|
||||
|
||||
void classify_hists(int win_height, int win_width, int block_stride_y, int block_stride_x,
|
||||
int win_stride_y, int win_stride_x, int height, int width, float* block_hists,
|
||||
void classify_hists(int win_height, int win_width, int block_stride_y, int block_stride_x,
|
||||
int win_stride_y, int win_stride_x, int height, int width, float* block_hists,
|
||||
float* coefs, float free_coef, float threshold, unsigned char* labels)
|
||||
{
|
||||
{
|
||||
const int nthreads = 256;
|
||||
const int nblocks = 1;
|
||||
|
||||
@ -408,7 +408,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x;
|
||||
classify_hists_kernel_many_blocks<nthreads, nblocks><<<grid, threads>>>(
|
||||
img_win_width, img_block_width, win_block_stride_x, win_block_stride_y,
|
||||
img_win_width, img_block_width, win_block_stride_x, win_block_stride_y,
|
||||
block_hists, coefs, free_coef, threshold, labels);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
@ -420,11 +420,11 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
|
||||
template <int nthreads>
|
||||
__global__ void extract_descrs_by_rows_kernel(const int img_block_width, const int win_block_stride_x, const int win_block_stride_y,
|
||||
__global__ void extract_descrs_by_rows_kernel(const int img_block_width, const int win_block_stride_x, const int win_block_stride_y,
|
||||
const float* block_hists, PtrElemStepf descriptors)
|
||||
{
|
||||
// Get left top corner of the window in src
|
||||
const float* hist = block_hists + (blockIdx.y * win_block_stride_y * img_block_width +
|
||||
const float* hist = block_hists + (blockIdx.y * win_block_stride_y * img_block_width +
|
||||
blockIdx.x * win_block_stride_x) * cblock_hist_size;
|
||||
|
||||
// Get left top corner of the window in dst
|
||||
@ -440,7 +440,7 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
|
||||
|
||||
void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x, int win_stride_y, int win_stride_x,
|
||||
void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x, int win_stride_y, int win_stride_x,
|
||||
int height, int width, float* block_hists, DevMem2Df descriptors)
|
||||
{
|
||||
const int nthreads = 256;
|
||||
@ -462,12 +462,12 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
|
||||
template <int nthreads>
|
||||
__global__ void extract_descrs_by_cols_kernel(const int img_block_width, const int win_block_stride_x,
|
||||
const int win_block_stride_y, const float* block_hists,
|
||||
__global__ void extract_descrs_by_cols_kernel(const int img_block_width, const int win_block_stride_x,
|
||||
const int win_block_stride_y, const float* block_hists,
|
||||
PtrElemStepf descriptors)
|
||||
{
|
||||
// Get left top corner of the window in src
|
||||
const float* hist = block_hists + (blockIdx.y * win_block_stride_y * img_block_width +
|
||||
const float* hist = block_hists + (blockIdx.y * win_block_stride_y * img_block_width +
|
||||
blockIdx.x * win_block_stride_x) * cblock_hist_size;
|
||||
|
||||
// Get left top corner of the window in dst
|
||||
@ -482,14 +482,14 @@ namespace cv { namespace gpu { namespace device
|
||||
int y = block_idx / cnblocks_win_x;
|
||||
int x = block_idx - y * cnblocks_win_x;
|
||||
|
||||
descriptor[(x * cnblocks_win_y + y) * cblock_hist_size + idx_in_block]
|
||||
descriptor[(x * cnblocks_win_y + y) * cblock_hist_size + idx_in_block]
|
||||
= hist[(y * img_block_width + x) * cblock_hist_size + idx_in_block];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x,
|
||||
int win_stride_y, int win_stride_x, int height, int width, float* block_hists,
|
||||
void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x,
|
||||
int win_stride_y, int win_stride_x, int height, int width, float* block_hists,
|
||||
DevMem2Df descriptors)
|
||||
{
|
||||
const int nthreads = 256;
|
||||
@ -514,7 +514,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
|
||||
template <int nthreads, int correct_gamma>
|
||||
__global__ void compute_gradients_8UC4_kernel(int height, int width, const PtrElemStep img,
|
||||
__global__ void compute_gradients_8UC4_kernel(int height, int width, const PtrElemStep img,
|
||||
float angle_scale, PtrElemStepf grad, PtrElemStep qangle)
|
||||
{
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
@ -524,9 +524,9 @@ namespace cv { namespace gpu { namespace device
|
||||
__shared__ float sh_row[(nthreads + 2) * 3];
|
||||
|
||||
uchar4 val;
|
||||
if (x < width)
|
||||
val = row[x];
|
||||
else
|
||||
if (x < width)
|
||||
val = row[x];
|
||||
else
|
||||
val = row[width - 2];
|
||||
|
||||
sh_row[threadIdx.x + 1] = val.x;
|
||||
@ -563,9 +563,9 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
float3 dx;
|
||||
if (correct_gamma)
|
||||
dx = make_float3(::sqrtf(b.x) - ::sqrtf(a.x), ::sqrtf(b.y) - ::sqrtf(a.y), ::sqrtf(b.z) - ::sqrtf(a.z));
|
||||
dx = make_float3(::sqrtf(b.x) - ::sqrtf(a.x), ::sqrtf(b.y) - ::sqrtf(a.y), ::sqrtf(b.z) - ::sqrtf(a.z));
|
||||
else
|
||||
dx = make_float3(b.x - a.x, b.y - a.y, b.z - a.z);
|
||||
dx = make_float3(b.x - a.x, b.y - a.y, b.z - a.z);
|
||||
|
||||
float3 dy = make_float3(0.f, 0.f, 0.f);
|
||||
|
||||
@ -588,7 +588,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
float mag0 = dx.x * dx.x + dy.x * dy.x;
|
||||
float mag1 = dx.y * dx.y + dy.y * dy.y;
|
||||
if (mag0 < mag1)
|
||||
if (mag0 < mag1)
|
||||
{
|
||||
best_dx = dx.y;
|
||||
best_dy = dy.y;
|
||||
@ -616,7 +616,7 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
|
||||
|
||||
void compute_gradients_8UC4(int nbins, int height, int width, const DevMem2Db& img,
|
||||
void compute_gradients_8UC4(int nbins, int height, int width, const DevMem2Db& img,
|
||||
float angle_scale, DevMem2Df grad, DevMem2Db qangle, bool correct_gamma)
|
||||
{
|
||||
const int nthreads = 256;
|
||||
@ -635,7 +635,7 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
|
||||
template <int nthreads, int correct_gamma>
|
||||
__global__ void compute_gradients_8UC1_kernel(int height, int width, const PtrElemStep img,
|
||||
__global__ void compute_gradients_8UC1_kernel(int height, int width, const PtrElemStep img,
|
||||
float angle_scale, PtrElemStepf grad, PtrElemStep qangle)
|
||||
{
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
@ -644,9 +644,9 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
__shared__ float sh_row[nthreads + 2];
|
||||
|
||||
if (x < width)
|
||||
sh_row[threadIdx.x + 1] = row[x];
|
||||
else
|
||||
if (x < width)
|
||||
sh_row[threadIdx.x + 1] = row[x];
|
||||
else
|
||||
sh_row[threadIdx.x + 1] = row[width - 2];
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
@ -688,7 +688,7 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
|
||||
|
||||
void compute_gradients_8UC1(int nbins, int height, int width, const DevMem2Db& img,
|
||||
void compute_gradients_8UC1(int nbins, int height, int width, const DevMem2Db& img,
|
||||
float angle_scale, DevMem2Df grad, DevMem2Db qangle, bool correct_gamma)
|
||||
{
|
||||
const int nthreads = 256;
|
||||
@ -729,13 +729,13 @@ namespace cv { namespace gpu { namespace device
|
||||
unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (x < dst.cols && y < dst.rows)
|
||||
{
|
||||
{
|
||||
float4 val = tex2D(resize8UC4_tex, x * sx + colOfs, y * sy);
|
||||
dst.ptr(y)[x] = make_uchar4(val.x * 255, val.y * 255, val.z * 255, val.w * 255);
|
||||
}
|
||||
}
|
||||
|
||||
template<class T, class TEX>
|
||||
template<class T, class TEX>
|
||||
static void resize_for_hog(const DevMem2Db& src, DevMem2Db dst, TEX& tex)
|
||||
{
|
||||
tex.filterMode = cudaFilterModeLinear;
|
||||
@ -743,19 +743,19 @@ namespace cv { namespace gpu { namespace device
|
||||
size_t texOfs = 0;
|
||||
int colOfs = 0;
|
||||
|
||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
|
||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
|
||||
cudaSafeCall( cudaBindTexture2D(&texOfs, tex, src.data, desc, src.cols, src.rows, src.step) );
|
||||
|
||||
if (texOfs != 0)
|
||||
if (texOfs != 0)
|
||||
{
|
||||
colOfs = static_cast<int>( texOfs/sizeof(T) );
|
||||
cudaSafeCall( cudaUnbindTexture(tex) );
|
||||
cudaSafeCall( cudaBindTexture2D(&texOfs, tex, src.data, desc, src.cols, src.rows, src.step) );
|
||||
}
|
||||
}
|
||||
|
||||
dim3 threads(32, 8);
|
||||
dim3 grid(divUp(dst.cols, threads.x), divUp(dst.rows, threads.y));
|
||||
|
||||
|
||||
float sx = static_cast<float>(src.cols) / dst.cols;
|
||||
float sy = static_cast<float>(src.rows) / dst.rows;
|
||||
|
||||
@ -769,5 +769,5 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
void resize_8UC1(const DevMem2Db& src, DevMem2Db dst) { resize_for_hog<uchar> (src, dst, resize8UC1_tex); }
|
||||
void resize_8UC4(const DevMem2Db& src, DevMem2Db dst) { resize_for_hog<uchar4>(src, dst, resize8UC4_tex); }
|
||||
} // namespace hog
|
||||
} // namespace hog
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
@ -970,12 +970,12 @@ namespace cv { namespace gpu { namespace device
|
||||
#undef IMPLEMENT_FILTER2D_TEX_READER
|
||||
|
||||
template <typename T, typename D>
|
||||
void filter2D_gpu(DevMem2Db srcWhole, int ofsX, int ofsY, DevMem2Db dst,
|
||||
int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel,
|
||||
void filter2D_gpu(DevMem2Db srcWhole, int ofsX, int ofsY, DevMem2Db dst,
|
||||
int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel,
|
||||
int borderMode, const float* borderValue, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*func_t)(const DevMem2D_<T> srcWhole, int xoff, int yoff, DevMem2D_<D> dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* borderValue, cudaStream_t stream);
|
||||
static const func_t funcs[] =
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
Filter2DCaller<T, D, BrdReflect101>::call,
|
||||
Filter2DCaller<T, D, BrdReplicate>::call,
|
||||
|
@ -50,9 +50,9 @@
|
||||
#include "safe_call.hpp"
|
||||
#include "opencv2/gpu/device/common.hpp"
|
||||
|
||||
namespace cv { namespace gpu
|
||||
namespace cv { namespace gpu
|
||||
{
|
||||
enum
|
||||
enum
|
||||
{
|
||||
BORDER_REFLECT101_GPU = 0,
|
||||
BORDER_REPLICATE_GPU,
|
||||
@ -60,7 +60,7 @@ namespace cv { namespace gpu
|
||||
BORDER_REFLECT_GPU,
|
||||
BORDER_WRAP_GPU
|
||||
};
|
||||
|
||||
|
||||
// Converts CPU border extrapolation mode into GPU internal analogue.
|
||||
// Returns true if the GPU analogue exists, false otherwise.
|
||||
bool tryConvertToGpuBorderType(int cpuBorderType, int& gpuBorderType);
|
||||
|
@ -43,9 +43,9 @@
|
||||
#include "internal_shared.hpp"
|
||||
#include "opencv2/gpu/device/vec_math.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace match_template
|
||||
namespace match_template
|
||||
{
|
||||
__device__ __forceinline__ float sum(float v) { return v; }
|
||||
__device__ __forceinline__ float sum(float2 v) { return v.x + v.y; }
|
||||
@ -80,7 +80,7 @@ namespace cv { namespace gpu { namespace device
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Naive_CCORR
|
||||
|
||||
template <typename T, int cn>
|
||||
template <typename T, int cn>
|
||||
__global__ void matchTemplateNaiveKernel_CCORR(int w, int h, const PtrStepb image, const PtrStepb templ, DevMem2Df result)
|
||||
{
|
||||
typedef typename TypeVec<T, cn>::vec_type Type;
|
||||
@ -122,7 +122,7 @@ namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
typedef void (*caller_t)(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, cudaStream_t stream);
|
||||
|
||||
static const caller_t callers[] =
|
||||
static const caller_t callers[] =
|
||||
{
|
||||
0, matchTemplateNaive_CCORR<float, 1>, matchTemplateNaive_CCORR<float, 2>, matchTemplateNaive_CCORR<float, 3>, matchTemplateNaive_CCORR<float, 4>
|
||||
};
|
||||
@ -135,7 +135,7 @@ namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
typedef void (*caller_t)(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, cudaStream_t stream);
|
||||
|
||||
static const caller_t callers[] =
|
||||
static const caller_t callers[] =
|
||||
{
|
||||
0, matchTemplateNaive_CCORR<uchar, 1>, matchTemplateNaive_CCORR<uchar, 2>, matchTemplateNaive_CCORR<uchar, 3>, matchTemplateNaive_CCORR<uchar, 4>
|
||||
};
|
||||
@ -192,7 +192,7 @@ namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
typedef void (*caller_t)(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, cudaStream_t stream);
|
||||
|
||||
static const caller_t callers[] =
|
||||
static const caller_t callers[] =
|
||||
{
|
||||
0, matchTemplateNaive_SQDIFF<float, 1>, matchTemplateNaive_SQDIFF<float, 2>, matchTemplateNaive_SQDIFF<float, 3>, matchTemplateNaive_SQDIFF<float, 4>
|
||||
};
|
||||
@ -204,7 +204,7 @@ namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
typedef void (*caller_t)(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, cudaStream_t stream);
|
||||
|
||||
static const caller_t callers[] =
|
||||
static const caller_t callers[] =
|
||||
{
|
||||
0, matchTemplateNaive_SQDIFF<uchar, 1>, matchTemplateNaive_SQDIFF<uchar, 2>, matchTemplateNaive_SQDIFF<uchar, 3>, matchTemplateNaive_SQDIFF<uchar, 4>
|
||||
};
|
||||
@ -249,7 +249,7 @@ namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
typedef void (*caller_t)(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned long long templ_sqsum, DevMem2Df result, cudaStream_t stream);
|
||||
|
||||
static const caller_t callers[] =
|
||||
static const caller_t callers[] =
|
||||
{
|
||||
0, matchTemplatePrepared_SQDIFF_8U<1>, matchTemplatePrepared_SQDIFF_8U<2>, matchTemplatePrepared_SQDIFF_8U<3>, matchTemplatePrepared_SQDIFF_8U<4>
|
||||
};
|
||||
@ -321,7 +321,7 @@ namespace cv { namespace gpu { namespace device
|
||||
DevMem2Df result, int cn, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*caller_t)(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned long long templ_sqsum, DevMem2Df result, cudaStream_t stream);
|
||||
static const caller_t callers[] =
|
||||
static const caller_t callers[] =
|
||||
{
|
||||
0, matchTemplatePrepared_SQDIFF_NORMED_8U<1>, matchTemplatePrepared_SQDIFF_NORMED_8U<2>, matchTemplatePrepared_SQDIFF_NORMED_8U<3>, matchTemplatePrepared_SQDIFF_NORMED_8U<4>
|
||||
};
|
||||
@ -379,16 +379,16 @@ namespace cv { namespace gpu { namespace device
|
||||
(image_sum_g.ptr(y + h)[x + w] - image_sum_g.ptr(y)[x + w]) -
|
||||
(image_sum_g.ptr(y + h)[x] - image_sum_g.ptr(y)[x]));
|
||||
float ccorr = result.ptr(y)[x];
|
||||
result.ptr(y)[x] = ccorr - image_sum_r_ * templ_sum_scale_r
|
||||
result.ptr(y)[x] = ccorr - image_sum_r_ * templ_sum_scale_r
|
||||
- image_sum_g_ * templ_sum_scale_g;
|
||||
}
|
||||
}
|
||||
|
||||
void matchTemplatePrepared_CCOFF_8UC2(
|
||||
int w, int h,
|
||||
const DevMem2D_<unsigned int> image_sum_r,
|
||||
int w, int h,
|
||||
const DevMem2D_<unsigned int> image_sum_r,
|
||||
const DevMem2D_<unsigned int> image_sum_g,
|
||||
unsigned int templ_sum_r, unsigned int templ_sum_g,
|
||||
unsigned int templ_sum_r, unsigned int templ_sum_g,
|
||||
DevMem2Df result, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads(32, 8);
|
||||
@ -406,7 +406,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
|
||||
__global__ void matchTemplatePreparedKernel_CCOFF_8UC3(
|
||||
int w, int h,
|
||||
int w, int h,
|
||||
float templ_sum_scale_r,
|
||||
float templ_sum_scale_g,
|
||||
float templ_sum_scale_b,
|
||||
@ -437,20 +437,20 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
|
||||
void matchTemplatePrepared_CCOFF_8UC3(
|
||||
int w, int h,
|
||||
const DevMem2D_<unsigned int> image_sum_r,
|
||||
int w, int h,
|
||||
const DevMem2D_<unsigned int> image_sum_r,
|
||||
const DevMem2D_<unsigned int> image_sum_g,
|
||||
const DevMem2D_<unsigned int> image_sum_b,
|
||||
unsigned int templ_sum_r,
|
||||
unsigned int templ_sum_g,
|
||||
unsigned int templ_sum_b,
|
||||
unsigned int templ_sum_r,
|
||||
unsigned int templ_sum_g,
|
||||
unsigned int templ_sum_b,
|
||||
DevMem2Df result, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads(32, 8);
|
||||
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||
|
||||
matchTemplatePreparedKernel_CCOFF_8UC3<<<grid, threads, 0, stream>>>(
|
||||
w, h,
|
||||
w, h,
|
||||
(float)templ_sum_r / (w * h),
|
||||
(float)templ_sum_g / (w * h),
|
||||
(float)templ_sum_b / (w * h),
|
||||
@ -464,8 +464,8 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
|
||||
__global__ void matchTemplatePreparedKernel_CCOFF_8UC4(
|
||||
int w, int h,
|
||||
float templ_sum_scale_r,
|
||||
int w, int h,
|
||||
float templ_sum_scale_r,
|
||||
float templ_sum_scale_g,
|
||||
float templ_sum_scale_b,
|
||||
float templ_sum_scale_a,
|
||||
@ -493,7 +493,7 @@ namespace cv { namespace gpu { namespace device
|
||||
(image_sum_a.ptr(y + h)[x + w] - image_sum_a.ptr(y)[x + w]) -
|
||||
(image_sum_a.ptr(y + h)[x] - image_sum_a.ptr(y)[x]));
|
||||
float ccorr = result.ptr(y)[x];
|
||||
result.ptr(y)[x] = ccorr - image_sum_r_ * templ_sum_scale_r
|
||||
result.ptr(y)[x] = ccorr - image_sum_r_ * templ_sum_scale_r
|
||||
- image_sum_g_ * templ_sum_scale_g
|
||||
- image_sum_b_ * templ_sum_scale_b
|
||||
- image_sum_a_ * templ_sum_scale_a;
|
||||
@ -501,24 +501,24 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
|
||||
void matchTemplatePrepared_CCOFF_8UC4(
|
||||
int w, int h,
|
||||
const DevMem2D_<unsigned int> image_sum_r,
|
||||
int w, int h,
|
||||
const DevMem2D_<unsigned int> image_sum_r,
|
||||
const DevMem2D_<unsigned int> image_sum_g,
|
||||
const DevMem2D_<unsigned int> image_sum_b,
|
||||
const DevMem2D_<unsigned int> image_sum_a,
|
||||
unsigned int templ_sum_r,
|
||||
unsigned int templ_sum_g,
|
||||
unsigned int templ_sum_b,
|
||||
unsigned int templ_sum_a,
|
||||
unsigned int templ_sum_r,
|
||||
unsigned int templ_sum_g,
|
||||
unsigned int templ_sum_b,
|
||||
unsigned int templ_sum_a,
|
||||
DevMem2Df result, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads(32, 8);
|
||||
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||
|
||||
matchTemplatePreparedKernel_CCOFF_8UC4<<<grid, threads, 0, stream>>>(
|
||||
w, h,
|
||||
(float)templ_sum_r / (w * h),
|
||||
(float)templ_sum_g / (w * h),
|
||||
w, h,
|
||||
(float)templ_sum_r / (w * h),
|
||||
(float)templ_sum_g / (w * h),
|
||||
(float)templ_sum_b / (w * h),
|
||||
(float)templ_sum_a / (w * h),
|
||||
image_sum_r, image_sum_g, image_sum_b, image_sum_a,
|
||||
@ -533,9 +533,9 @@ namespace cv { namespace gpu { namespace device
|
||||
// Prepared_CCOFF_NORMED
|
||||
|
||||
__global__ void matchTemplatePreparedKernel_CCOFF_NORMED_8U(
|
||||
int w, int h, float weight,
|
||||
int w, int h, float weight,
|
||||
float templ_sum_scale, float templ_sqsum_scale,
|
||||
const PtrStep<unsigned int> image_sum,
|
||||
const PtrStep<unsigned int> image_sum,
|
||||
const PtrStep<unsigned long long> image_sqsum,
|
||||
DevMem2Df result)
|
||||
{
|
||||
@ -557,7 +557,7 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
|
||||
void matchTemplatePrepared_CCOFF_NORMED_8U(
|
||||
int w, int h, const DevMem2D_<unsigned int> image_sum,
|
||||
int w, int h, const DevMem2D_<unsigned int> image_sum,
|
||||
const DevMem2D_<unsigned long long> image_sqsum,
|
||||
unsigned int templ_sum, unsigned long long templ_sqsum,
|
||||
DevMem2Df result, cudaStream_t stream)
|
||||
@ -570,7 +570,7 @@ namespace cv { namespace gpu { namespace device
|
||||
float templ_sqsum_scale = templ_sqsum - weight * templ_sum * templ_sum;
|
||||
|
||||
matchTemplatePreparedKernel_CCOFF_NORMED_8U<<<grid, threads, 0, stream>>>(
|
||||
w, h, weight, templ_sum_scale, templ_sqsum_scale,
|
||||
w, h, weight, templ_sum_scale, templ_sqsum_scale,
|
||||
image_sum, image_sqsum, result);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
@ -581,8 +581,8 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
|
||||
__global__ void matchTemplatePreparedKernel_CCOFF_NORMED_8UC2(
|
||||
int w, int h, float weight,
|
||||
float templ_sum_scale_r, float templ_sum_scale_g,
|
||||
int w, int h, float weight,
|
||||
float templ_sum_scale_r, float templ_sum_scale_g,
|
||||
float templ_sqsum_scale,
|
||||
const PtrStep<unsigned int> image_sum_r, const PtrStep<unsigned long long> image_sqsum_r,
|
||||
const PtrStep<unsigned int> image_sum_g, const PtrStep<unsigned long long> image_sqsum_g,
|
||||
@ -615,7 +615,7 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
|
||||
void matchTemplatePrepared_CCOFF_NORMED_8UC2(
|
||||
int w, int h,
|
||||
int w, int h,
|
||||
const DevMem2D_<unsigned int> image_sum_r, const DevMem2D_<unsigned long long> image_sqsum_r,
|
||||
const DevMem2D_<unsigned int> image_sum_g, const DevMem2D_<unsigned long long> image_sqsum_g,
|
||||
unsigned int templ_sum_r, unsigned long long templ_sqsum_r,
|
||||
@ -628,15 +628,15 @@ namespace cv { namespace gpu { namespace device
|
||||
float weight = 1.f / (w * h);
|
||||
float templ_sum_scale_r = templ_sum_r * weight;
|
||||
float templ_sum_scale_g = templ_sum_g * weight;
|
||||
float templ_sqsum_scale = templ_sqsum_r - weight * templ_sum_r * templ_sum_r
|
||||
float templ_sqsum_scale = templ_sqsum_r - weight * templ_sum_r * templ_sum_r
|
||||
+ templ_sqsum_g - weight * templ_sum_g * templ_sum_g;
|
||||
|
||||
matchTemplatePreparedKernel_CCOFF_NORMED_8UC2<<<grid, threads, 0, stream>>>(
|
||||
w, h, weight,
|
||||
w, h, weight,
|
||||
templ_sum_scale_r, templ_sum_scale_g,
|
||||
templ_sqsum_scale,
|
||||
image_sum_r, image_sqsum_r,
|
||||
image_sum_g, image_sqsum_g,
|
||||
image_sum_r, image_sqsum_r,
|
||||
image_sum_g, image_sqsum_g,
|
||||
result);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
@ -647,8 +647,8 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
|
||||
__global__ void matchTemplatePreparedKernel_CCOFF_NORMED_8UC3(
|
||||
int w, int h, float weight,
|
||||
float templ_sum_scale_r, float templ_sum_scale_g, float templ_sum_scale_b,
|
||||
int w, int h, float weight,
|
||||
float templ_sum_scale_r, float templ_sum_scale_g, float templ_sum_scale_b,
|
||||
float templ_sqsum_scale,
|
||||
const PtrStep<unsigned int> image_sum_r, const PtrStep<unsigned long long> image_sqsum_r,
|
||||
const PtrStep<unsigned int> image_sum_g, const PtrStep<unsigned long long> image_sqsum_g,
|
||||
@ -690,7 +690,7 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
|
||||
void matchTemplatePrepared_CCOFF_NORMED_8UC3(
|
||||
int w, int h,
|
||||
int w, int h,
|
||||
const DevMem2D_<unsigned int> image_sum_r, const DevMem2D_<unsigned long long> image_sqsum_r,
|
||||
const DevMem2D_<unsigned int> image_sum_g, const DevMem2D_<unsigned long long> image_sqsum_g,
|
||||
const DevMem2D_<unsigned int> image_sum_b, const DevMem2D_<unsigned long long> image_sqsum_b,
|
||||
@ -706,17 +706,17 @@ namespace cv { namespace gpu { namespace device
|
||||
float templ_sum_scale_r = templ_sum_r * weight;
|
||||
float templ_sum_scale_g = templ_sum_g * weight;
|
||||
float templ_sum_scale_b = templ_sum_b * weight;
|
||||
float templ_sqsum_scale = templ_sqsum_r - weight * templ_sum_r * templ_sum_r
|
||||
float templ_sqsum_scale = templ_sqsum_r - weight * templ_sum_r * templ_sum_r
|
||||
+ templ_sqsum_g - weight * templ_sum_g * templ_sum_g
|
||||
+ templ_sqsum_b - weight * templ_sum_b * templ_sum_b;
|
||||
|
||||
matchTemplatePreparedKernel_CCOFF_NORMED_8UC3<<<grid, threads, 0, stream>>>(
|
||||
w, h, weight,
|
||||
templ_sum_scale_r, templ_sum_scale_g, templ_sum_scale_b,
|
||||
templ_sqsum_scale,
|
||||
image_sum_r, image_sqsum_r,
|
||||
image_sum_g, image_sqsum_g,
|
||||
image_sum_b, image_sqsum_b,
|
||||
w, h, weight,
|
||||
templ_sum_scale_r, templ_sum_scale_g, templ_sum_scale_b,
|
||||
templ_sqsum_scale,
|
||||
image_sum_r, image_sqsum_r,
|
||||
image_sum_g, image_sqsum_g,
|
||||
image_sum_b, image_sqsum_b,
|
||||
result);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
@ -727,8 +727,8 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
|
||||
__global__ void matchTemplatePreparedKernel_CCOFF_NORMED_8UC4(
|
||||
int w, int h, float weight,
|
||||
float templ_sum_scale_r, float templ_sum_scale_g, float templ_sum_scale_b,
|
||||
int w, int h, float weight,
|
||||
float templ_sum_scale_r, float templ_sum_scale_g, float templ_sum_scale_b,
|
||||
float templ_sum_scale_a, float templ_sqsum_scale,
|
||||
const PtrStep<unsigned int> image_sum_r, const PtrStep<unsigned long long> image_sqsum_r,
|
||||
const PtrStep<unsigned int> image_sum_g, const PtrStep<unsigned long long> image_sqsum_g,
|
||||
@ -777,7 +777,7 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
|
||||
void matchTemplatePrepared_CCOFF_NORMED_8UC4(
|
||||
int w, int h,
|
||||
int w, int h,
|
||||
const DevMem2D_<unsigned int> image_sum_r, const DevMem2D_<unsigned long long> image_sqsum_r,
|
||||
const DevMem2D_<unsigned int> image_sum_g, const DevMem2D_<unsigned long long> image_sqsum_g,
|
||||
const DevMem2D_<unsigned int> image_sum_b, const DevMem2D_<unsigned long long> image_sqsum_b,
|
||||
@ -802,13 +802,13 @@ namespace cv { namespace gpu { namespace device
|
||||
+ templ_sqsum_a - weight * templ_sum_a * templ_sum_a;
|
||||
|
||||
matchTemplatePreparedKernel_CCOFF_NORMED_8UC4<<<grid, threads, 0, stream>>>(
|
||||
w, h, weight,
|
||||
templ_sum_scale_r, templ_sum_scale_g, templ_sum_scale_b, templ_sum_scale_a,
|
||||
templ_sqsum_scale,
|
||||
image_sum_r, image_sqsum_r,
|
||||
image_sum_g, image_sqsum_g,
|
||||
image_sum_b, image_sqsum_b,
|
||||
image_sum_a, image_sqsum_a,
|
||||
w, h, weight,
|
||||
templ_sum_scale_r, templ_sum_scale_g, templ_sum_scale_b, templ_sum_scale_a,
|
||||
templ_sqsum_scale,
|
||||
image_sum_r, image_sqsum_r,
|
||||
image_sum_g, image_sqsum_g,
|
||||
image_sum_b, image_sqsum_b,
|
||||
image_sum_a, image_sqsum_a,
|
||||
result);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
@ -821,7 +821,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
template <int cn>
|
||||
__global__ void normalizeKernel_8U(
|
||||
int w, int h, const PtrStep<unsigned long long> image_sqsum,
|
||||
int w, int h, const PtrStep<unsigned long long> image_sqsum,
|
||||
unsigned long long templ_sqsum, DevMem2Df result)
|
||||
{
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
@ -836,7 +836,7 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
}
|
||||
|
||||
void normalize_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum,
|
||||
void normalize_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum,
|
||||
unsigned long long templ_sqsum, DevMem2Df result, int cn, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads(32, 8);
|
||||
|
@ -42,9 +42,9 @@
|
||||
|
||||
#include "internal_shared.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace mathfunc
|
||||
namespace mathfunc
|
||||
{
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// Cart <-> Polar
|
||||
@ -79,7 +79,7 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
};
|
||||
template <typename Mag, typename Angle>
|
||||
__global__ void cartToPolar(const float* xptr, size_t x_step, const float* yptr, size_t y_step,
|
||||
__global__ void cartToPolar(const float* xptr, size_t x_step, const float* yptr, size_t y_step,
|
||||
float* mag, size_t mag_step, float* angle, size_t angle_step, float scale, int width, int height)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
@ -137,11 +137,11 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
grid.x = divUp(x.cols, threads.x);
|
||||
grid.y = divUp(x.rows, threads.y);
|
||||
|
||||
|
||||
const float scale = angleInDegrees ? (float)(180.0f / CV_PI) : 1.f;
|
||||
|
||||
cartToPolar<Mag, Angle><<<grid, threads, 0, stream>>>(
|
||||
x.data, x.step/x.elemSize(), y.data, y.step/y.elemSize(),
|
||||
x.data, x.step/x.elemSize(), y.data, y.step/y.elemSize(),
|
||||
mag.data, mag.step/mag.elemSize(), angle.data, angle.step/angle.elemSize(), scale, x.cols, x.rows);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
@ -152,7 +152,7 @@ namespace cv { namespace gpu { namespace device
|
||||
void cartToPolar_gpu(DevMem2Df x, DevMem2Df y, DevMem2Df mag, bool magSqr, DevMem2Df angle, bool angleInDegrees, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*caller_t)(DevMem2Df x, DevMem2Df y, DevMem2Df mag, DevMem2Df angle, bool angleInDegrees, cudaStream_t stream);
|
||||
static const caller_t callers[2][2][2] =
|
||||
static const caller_t callers[2][2][2] =
|
||||
{
|
||||
{
|
||||
{
|
||||
@ -187,10 +187,10 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
grid.x = divUp(mag.cols, threads.x);
|
||||
grid.y = divUp(mag.rows, threads.y);
|
||||
|
||||
|
||||
const float scale = angleInDegrees ? (float)(CV_PI / 180.0f) : 1.0f;
|
||||
|
||||
polarToCart<Mag><<<grid, threads, 0, stream>>>(mag.data, mag.step/mag.elemSize(),
|
||||
polarToCart<Mag><<<grid, threads, 0, stream>>>(mag.data, mag.step/mag.elemSize(),
|
||||
angle.data, angle.step/angle.elemSize(), scale, x.data, x.step/x.elemSize(), y.data, y.step/y.elemSize(), mag.cols, mag.rows);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
@ -201,7 +201,7 @@ namespace cv { namespace gpu { namespace device
|
||||
void polarToCart_gpu(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y, bool angleInDegrees, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*caller_t)(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y, bool angleInDegrees, cudaStream_t stream);
|
||||
static const caller_t callers[2] =
|
||||
static const caller_t callers[2] =
|
||||
{
|
||||
polarToCart_caller<NonEmptyMag>,
|
||||
polarToCart_caller<EmptyMag>
|
||||
|
@ -45,9 +45,9 @@
|
||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||
#include "opencv2/gpu/device/vec_math.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace matrix_reductions
|
||||
namespace matrix_reductions
|
||||
{
|
||||
// Performs reduction in shared memory
|
||||
template <int size, typename T>
|
||||
@ -74,19 +74,19 @@ namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
explicit Mask8U(PtrStepb mask): mask(mask) {}
|
||||
|
||||
__device__ __forceinline__ bool operator()(int y, int x) const
|
||||
{
|
||||
return mask.ptr(y)[x];
|
||||
__device__ __forceinline__ bool operator()(int y, int x) const
|
||||
{
|
||||
return mask.ptr(y)[x];
|
||||
}
|
||||
|
||||
PtrStepb mask;
|
||||
};
|
||||
|
||||
struct MaskTrue
|
||||
{
|
||||
__device__ __forceinline__ bool operator()(int y, int x) const
|
||||
{
|
||||
return true;
|
||||
struct MaskTrue
|
||||
{
|
||||
__device__ __forceinline__ bool operator()(int y, int x) const
|
||||
{
|
||||
return true;
|
||||
}
|
||||
__device__ __forceinline__ MaskTrue(){}
|
||||
__device__ __forceinline__ MaskTrue(const MaskTrue& mask_){}
|
||||
@ -95,7 +95,7 @@ namespace cv { namespace gpu { namespace device
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// Min max
|
||||
|
||||
// To avoid shared bank conflicts we convert each value into value of
|
||||
// To avoid shared bank conflicts we convert each value into value of
|
||||
// appropriate type (32 bits minimum)
|
||||
template <typename T> struct MinMaxTypeTraits {};
|
||||
template <> struct MinMaxTypeTraits<uchar> { typedef int best_type; };
|
||||
@ -106,7 +106,7 @@ namespace cv { namespace gpu { namespace device
|
||||
template <> struct MinMaxTypeTraits<float> { typedef float best_type; };
|
||||
template <> struct MinMaxTypeTraits<double> { typedef double best_type; };
|
||||
|
||||
namespace minmax
|
||||
namespace minmax
|
||||
{
|
||||
__constant__ int ctwidth;
|
||||
__constant__ int ctheight;
|
||||
@ -131,19 +131,19 @@ namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
dim3 threads, grid;
|
||||
estimateThreadCfg(cols, rows, threads, grid);
|
||||
bufcols = grid.x * grid.y * elem_size;
|
||||
bufcols = grid.x * grid.y * elem_size;
|
||||
bufrows = 2;
|
||||
}
|
||||
|
||||
|
||||
// Estimates device constants which are used in the kernels using specified thread configuration
|
||||
void setKernelConsts(int cols, int rows, const dim3& threads, const dim3& grid)
|
||||
{
|
||||
{
|
||||
int twidth = divUp(divUp(cols, grid.x), threads.x);
|
||||
int theight = divUp(divUp(rows, grid.y), threads.y);
|
||||
cudaSafeCall(cudaMemcpyToSymbol(ctwidth, &twidth, sizeof(ctwidth)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(ctheight, &theight, sizeof(ctheight)));
|
||||
}
|
||||
cudaSafeCall(cudaMemcpyToSymbol(ctwidth, &twidth, sizeof(ctwidth)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(ctheight, &theight, sizeof(ctheight)));
|
||||
}
|
||||
|
||||
|
||||
// Does min and max in shared memory
|
||||
@ -195,10 +195,10 @@ namespace cv { namespace gpu { namespace device
|
||||
for (uint x = x0; x < x_end; x += blockDim.x)
|
||||
{
|
||||
T val = src_row[x];
|
||||
if (mask(y, x))
|
||||
{
|
||||
mymin = ::min(mymin, val);
|
||||
mymax = ::max(mymax, val);
|
||||
if (mask(y, x))
|
||||
{
|
||||
mymin = ::min(mymin, val);
|
||||
mymax = ::max(mymax, val);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -209,7 +209,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
findMinMaxInSmem<nthreads, best_type>(sminval, smaxval, tid);
|
||||
|
||||
if (tid == 0)
|
||||
if (tid == 0)
|
||||
{
|
||||
minval[blockIdx.y * gridDim.x + blockIdx.x] = (T)sminval[0];
|
||||
maxval[blockIdx.y * gridDim.x + blockIdx.x] = (T)smaxval[0];
|
||||
@ -240,7 +240,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
findMinMaxInSmem<nthreads, best_type>(sminval, smaxval, tid);
|
||||
|
||||
if (tid == 0)
|
||||
if (tid == 0)
|
||||
{
|
||||
minval[0] = (T)sminval[0];
|
||||
maxval[0] = (T)smaxval[0];
|
||||
@ -248,7 +248,7 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
}
|
||||
#else
|
||||
if (tid == 0)
|
||||
if (tid == 0)
|
||||
{
|
||||
minval[blockIdx.y * gridDim.x + blockIdx.x] = (T)sminval[0];
|
||||
maxval[blockIdx.y * gridDim.x + blockIdx.x] = (T)smaxval[0];
|
||||
@ -256,7 +256,7 @@ namespace cv { namespace gpu { namespace device
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
template <typename T>
|
||||
void minMaxMaskCaller(const DevMem2Db src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf)
|
||||
{
|
||||
@ -277,7 +277,7 @@ namespace cv { namespace gpu { namespace device
|
||||
cudaSafeCall( cudaMemcpy(&maxval_, maxval_buf, sizeof(T), cudaMemcpyDeviceToHost) );
|
||||
*minval = minval_;
|
||||
*maxval = maxval_;
|
||||
}
|
||||
}
|
||||
|
||||
template void minMaxMaskCaller<uchar>(const DevMem2Db, const PtrStepb, double*, double*, PtrStepb);
|
||||
template void minMaxMaskCaller<char>(const DevMem2Db, const PtrStepb, double*, double*, PtrStepb);
|
||||
@ -308,7 +308,7 @@ namespace cv { namespace gpu { namespace device
|
||||
cudaSafeCall( cudaMemcpy(&maxval_, maxval_buf, sizeof(T), cudaMemcpyDeviceToHost) );
|
||||
*minval = minval_;
|
||||
*maxval = maxval_;
|
||||
}
|
||||
}
|
||||
|
||||
template void minMaxCaller<uchar>(const DevMem2Db, double*, double*, PtrStepb);
|
||||
template void minMaxCaller<char>(const DevMem2Db, double*, double*, PtrStepb);
|
||||
@ -325,7 +325,7 @@ namespace cv { namespace gpu { namespace device
|
||||
typedef typename MinMaxTypeTraits<T>::best_type best_type;
|
||||
__shared__ best_type sminval[nthreads];
|
||||
__shared__ best_type smaxval[nthreads];
|
||||
|
||||
|
||||
uint tid = threadIdx.y * blockDim.x + threadIdx.x;
|
||||
uint idx = ::min(tid, size - 1);
|
||||
|
||||
@ -335,7 +335,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
findMinMaxInSmem<nthreads, best_type>(sminval, smaxval, tid);
|
||||
|
||||
if (tid == 0)
|
||||
if (tid == 0)
|
||||
{
|
||||
minval[0] = (T)sminval[0];
|
||||
maxval[0] = (T)smaxval[0];
|
||||
@ -410,7 +410,7 @@ namespace cv { namespace gpu { namespace device
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// minMaxLoc
|
||||
|
||||
namespace minmaxloc
|
||||
namespace minmaxloc
|
||||
{
|
||||
__constant__ int ctwidth;
|
||||
__constant__ int ctheight;
|
||||
@ -431,7 +431,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
|
||||
// Returns required buffer sizes
|
||||
void getBufSizeRequired(int cols, int rows, int elem_size, int& b1cols,
|
||||
void getBufSizeRequired(int cols, int rows, int elem_size, int& b1cols,
|
||||
int& b1rows, int& b2cols, int& b2rows)
|
||||
{
|
||||
dim3 threads, grid;
|
||||
@ -445,16 +445,16 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
// Estimates device constants which are used in the kernels using specified thread configuration
|
||||
void setKernelConsts(int cols, int rows, const dim3& threads, const dim3& grid)
|
||||
{
|
||||
{
|
||||
int twidth = divUp(divUp(cols, grid.x), threads.x);
|
||||
int theight = divUp(divUp(rows, grid.y), threads.y);
|
||||
cudaSafeCall(cudaMemcpyToSymbol(ctwidth, &twidth, sizeof(ctwidth)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(ctheight, &theight, sizeof(ctheight)));
|
||||
}
|
||||
cudaSafeCall(cudaMemcpyToSymbol(ctwidth, &twidth, sizeof(ctwidth)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(ctheight, &theight, sizeof(ctheight)));
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
__device__ void merge(uint tid, uint offset, volatile T* minval, volatile T* maxval,
|
||||
__device__ void merge(uint tid, uint offset, volatile T* minval, volatile T* maxval,
|
||||
volatile uint* minloc, volatile uint* maxloc)
|
||||
{
|
||||
T val = minval[tid + offset];
|
||||
@ -473,7 +473,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
|
||||
template <int size, typename T>
|
||||
__device__ void findMinMaxLocInSmem(volatile T* minval, volatile T* maxval, volatile uint* minloc,
|
||||
__device__ void findMinMaxLocInSmem(volatile T* minval, volatile T* maxval, volatile uint* minloc,
|
||||
volatile uint* maxloc, const uint tid)
|
||||
{
|
||||
if (size >= 512) { if (tid < 256) { merge(tid, 256, minval, maxval, minloc, maxloc); } __syncthreads(); }
|
||||
@ -493,7 +493,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
|
||||
template <int nthreads, typename T, typename Mask>
|
||||
__global__ void minMaxLocKernel(const DevMem2Db src, Mask mask, T* minval, T* maxval,
|
||||
__global__ void minMaxLocKernel(const DevMem2Db src, Mask mask, T* minval, T* maxval,
|
||||
uint* minloc, uint* maxloc)
|
||||
{
|
||||
typedef typename MinMaxTypeTraits<T>::best_type best_type;
|
||||
@ -507,7 +507,7 @@ namespace cv { namespace gpu { namespace device
|
||||
uint tid = threadIdx.y * blockDim.x + threadIdx.x;
|
||||
|
||||
T mymin = numeric_limits<T>::max();
|
||||
T mymax = numeric_limits<T>::is_signed ? -numeric_limits<T>::max() : numeric_limits<T>::min();
|
||||
T mymax = numeric_limits<T>::is_signed ? -numeric_limits<T>::max() : numeric_limits<T>::min();
|
||||
uint myminloc = 0;
|
||||
uint mymaxloc = 0;
|
||||
uint y_end = ::min(y0 + (ctheight - 1) * blockDim.y + 1, src.rows);
|
||||
@ -527,7 +527,7 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
}
|
||||
|
||||
sminval[tid] = mymin;
|
||||
sminval[tid] = mymin;
|
||||
smaxval[tid] = mymax;
|
||||
sminloc[tid] = myminloc;
|
||||
smaxloc[tid] = mymaxloc;
|
||||
@ -564,7 +564,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
findMinMaxLocInSmem<nthreads, best_type>(sminval, smaxval, sminloc, smaxloc, tid);
|
||||
|
||||
if (tid == 0)
|
||||
if (tid == 0)
|
||||
{
|
||||
minval[0] = (T)sminval[0];
|
||||
maxval[0] = (T)smaxval[0];
|
||||
@ -574,7 +574,7 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
}
|
||||
#else
|
||||
if (tid == 0)
|
||||
if (tid == 0)
|
||||
{
|
||||
minval[blockIdx.y * gridDim.x + blockIdx.x] = (T)sminval[0];
|
||||
maxval[blockIdx.y * gridDim.x + blockIdx.x] = (T)smaxval[0];
|
||||
@ -586,7 +586,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
|
||||
template <typename T>
|
||||
void minMaxLocMaskCaller(const DevMem2Db src, const PtrStepb mask, double* minval, double* maxval,
|
||||
void minMaxLocMaskCaller(const DevMem2Db src, const PtrStepb mask, double* minval, double* maxval,
|
||||
int minloc[2], int maxloc[2], PtrStepb valbuf, PtrStepb locbuf)
|
||||
{
|
||||
dim3 threads, grid;
|
||||
@ -598,7 +598,7 @@ namespace cv { namespace gpu { namespace device
|
||||
uint* minloc_buf = (uint*)locbuf.ptr(0);
|
||||
uint* maxloc_buf = (uint*)locbuf.ptr(1);
|
||||
|
||||
minMaxLocKernel<256, T, Mask8U><<<grid, threads>>>(src, Mask8U(mask), minval_buf, maxval_buf,
|
||||
minMaxLocKernel<256, T, Mask8U><<<grid, threads>>>(src, Mask8U(mask), minval_buf, maxval_buf,
|
||||
minloc_buf, maxloc_buf);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
@ -627,7 +627,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
|
||||
template <typename T>
|
||||
void minMaxLocCaller(const DevMem2Db src, double* minval, double* maxval,
|
||||
void minMaxLocCaller(const DevMem2Db src, double* minval, double* maxval,
|
||||
int minloc[2], int maxloc[2], PtrStepb valbuf, PtrStepb locbuf)
|
||||
{
|
||||
dim3 threads, grid;
|
||||
@ -639,7 +639,7 @@ namespace cv { namespace gpu { namespace device
|
||||
uint* minloc_buf = (uint*)locbuf.ptr(0);
|
||||
uint* maxloc_buf = (uint*)locbuf.ptr(1);
|
||||
|
||||
minMaxLocKernel<256, T, MaskTrue><<<grid, threads>>>(src, MaskTrue(), minval_buf, maxval_buf,
|
||||
minMaxLocKernel<256, T, MaskTrue><<<grid, threads>>>(src, MaskTrue(), minval_buf, maxval_buf,
|
||||
minloc_buf, maxloc_buf);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
@ -688,7 +688,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
findMinMaxLocInSmem<nthreads, best_type>(sminval, smaxval, sminloc, smaxloc, tid);
|
||||
|
||||
if (tid == 0)
|
||||
if (tid == 0)
|
||||
{
|
||||
minval[0] = (T)sminval[0];
|
||||
maxval[0] = (T)smaxval[0];
|
||||
@ -699,7 +699,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
|
||||
template <typename T>
|
||||
void minMaxLocMaskMultipassCaller(const DevMem2Db src, const PtrStepb mask, double* minval, double* maxval,
|
||||
void minMaxLocMaskMultipassCaller(const DevMem2Db src, const PtrStepb mask, double* minval, double* maxval,
|
||||
int minloc[2], int maxloc[2], PtrStepb valbuf, PtrStepb locbuf)
|
||||
{
|
||||
dim3 threads, grid;
|
||||
@ -711,7 +711,7 @@ namespace cv { namespace gpu { namespace device
|
||||
uint* minloc_buf = (uint*)locbuf.ptr(0);
|
||||
uint* maxloc_buf = (uint*)locbuf.ptr(1);
|
||||
|
||||
minMaxLocKernel<256, T, Mask8U><<<grid, threads>>>(src, Mask8U(mask), minval_buf, maxval_buf,
|
||||
minMaxLocKernel<256, T, Mask8U><<<grid, threads>>>(src, Mask8U(mask), minval_buf, maxval_buf,
|
||||
minloc_buf, maxloc_buf);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
minMaxLocPass2Kernel<256, T><<<1, 256>>>(minval_buf, maxval_buf, minloc_buf, maxloc_buf, grid.x * grid.y);
|
||||
@ -741,7 +741,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
|
||||
template <typename T>
|
||||
void minMaxLocMultipassCaller(const DevMem2Db src, double* minval, double* maxval,
|
||||
void minMaxLocMultipassCaller(const DevMem2Db src, double* minval, double* maxval,
|
||||
int minloc[2], int maxloc[2], PtrStepb valbuf, PtrStepb locbuf)
|
||||
{
|
||||
dim3 threads, grid;
|
||||
@ -753,7 +753,7 @@ namespace cv { namespace gpu { namespace device
|
||||
uint* minloc_buf = (uint*)locbuf.ptr(0);
|
||||
uint* maxloc_buf = (uint*)locbuf.ptr(1);
|
||||
|
||||
minMaxLocKernel<256, T, MaskTrue><<<grid, threads>>>(src, MaskTrue(), minval_buf, maxval_buf,
|
||||
minMaxLocKernel<256, T, MaskTrue><<<grid, threads>>>(src, MaskTrue(), minval_buf, maxval_buf,
|
||||
minloc_buf, maxloc_buf);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
minMaxLocPass2Kernel<256, T><<<1, 256>>>(minval_buf, maxval_buf, minloc_buf, maxloc_buf, grid.x * grid.y);
|
||||
@ -785,7 +785,7 @@ namespace cv { namespace gpu { namespace device
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// countNonZero
|
||||
|
||||
namespace countnonzero
|
||||
namespace countnonzero
|
||||
{
|
||||
__constant__ int ctwidth;
|
||||
__constant__ int ctheight;
|
||||
@ -811,11 +811,11 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
|
||||
void setKernelConsts(int cols, int rows, const dim3& threads, const dim3& grid)
|
||||
{
|
||||
{
|
||||
int twidth = divUp(divUp(cols, grid.x), threads.x);
|
||||
int theight = divUp(divUp(rows, grid.y), threads.y);
|
||||
cudaSafeCall(cudaMemcpyToSymbol(ctwidth, &twidth, sizeof(twidth)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(ctheight, &theight, sizeof(theight)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(ctwidth, &twidth, sizeof(twidth)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(ctheight, &theight, sizeof(theight)));
|
||||
}
|
||||
|
||||
|
||||
@ -862,7 +862,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
sumInSmem<nthreads, uint>(scount, tid);
|
||||
|
||||
if (tid == 0)
|
||||
if (tid == 0)
|
||||
{
|
||||
count[0] = scount[0];
|
||||
blocks_finished = 0;
|
||||
@ -873,7 +873,7 @@ namespace cv { namespace gpu { namespace device
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
template <typename T>
|
||||
int countNonZeroCaller(const DevMem2Db src, PtrStepb buf)
|
||||
{
|
||||
@ -890,9 +890,9 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
uint count;
|
||||
cudaSafeCall(cudaMemcpy(&count, count_buf, sizeof(int), cudaMemcpyDeviceToHost));
|
||||
|
||||
|
||||
return count;
|
||||
}
|
||||
}
|
||||
|
||||
template int countNonZeroCaller<uchar>(const DevMem2Db, PtrStepb);
|
||||
template int countNonZeroCaller<char>(const DevMem2Db, PtrStepb);
|
||||
@ -914,7 +914,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
sumInSmem<nthreads, uint>(scount, tid);
|
||||
|
||||
if (tid == 0)
|
||||
if (tid == 0)
|
||||
count[0] = scount[0];
|
||||
}
|
||||
|
||||
@ -937,9 +937,9 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
uint count;
|
||||
cudaSafeCall(cudaMemcpy(&count, count_buf, sizeof(int), cudaMemcpyDeviceToHost));
|
||||
|
||||
|
||||
return count;
|
||||
}
|
||||
}
|
||||
|
||||
template int countNonZeroMultipassCaller<uchar>(const DevMem2Db, PtrStepb);
|
||||
template int countNonZeroMultipassCaller<char>(const DevMem2Db, PtrStepb);
|
||||
@ -965,16 +965,16 @@ namespace cv { namespace gpu { namespace device
|
||||
template <> struct SumType<float> { typedef float R; };
|
||||
template <> struct SumType<double> { typedef double R; };
|
||||
|
||||
template <typename R>
|
||||
template <typename R>
|
||||
struct IdentityOp { static __device__ __forceinline__ R call(R x) { return x; } };
|
||||
|
||||
template <typename R>
|
||||
template <typename R>
|
||||
struct AbsOp { static __device__ __forceinline__ R call(R x) { return ::abs(x); } };
|
||||
|
||||
template <>
|
||||
struct AbsOp<uint> { static __device__ __forceinline__ uint call(uint x) { return x; } };
|
||||
|
||||
template <typename R>
|
||||
template <typename R>
|
||||
struct SqrOp { static __device__ __forceinline__ R call(R x) { return x * x; } };
|
||||
|
||||
__constant__ int ctwidth;
|
||||
@ -987,7 +987,7 @@ namespace cv { namespace gpu { namespace device
|
||||
void estimateThreadCfg(int cols, int rows, dim3& threads, dim3& grid)
|
||||
{
|
||||
threads = dim3(threads_x, threads_y);
|
||||
grid = dim3(divUp(cols, threads.x * threads.y),
|
||||
grid = dim3(divUp(cols, threads.x * threads.y),
|
||||
divUp(rows, threads.y * threads.x));
|
||||
grid.x = std::min(grid.x, threads.x);
|
||||
grid.y = std::min(grid.y, threads.y);
|
||||
@ -1004,11 +1004,11 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
|
||||
void setKernelConsts(int cols, int rows, const dim3& threads, const dim3& grid)
|
||||
{
|
||||
{
|
||||
int twidth = divUp(divUp(cols, grid.x), threads.x);
|
||||
int theight = divUp(divUp(rows, grid.y), threads.y);
|
||||
cudaSafeCall(cudaMemcpyToSymbol(ctwidth, &twidth, sizeof(twidth)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(ctheight, &theight, sizeof(theight)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(ctwidth, &twidth, sizeof(twidth)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(ctheight, &theight, sizeof(theight)));
|
||||
}
|
||||
|
||||
template <typename T, typename R, typename Op, int nthreads>
|
||||
@ -1055,7 +1055,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
sumInSmem<nthreads, R>(smem, tid);
|
||||
|
||||
if (tid == 0)
|
||||
if (tid == 0)
|
||||
{
|
||||
result[0] = smem[0];
|
||||
blocks_finished = 0;
|
||||
@ -1078,7 +1078,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
sumInSmem<nthreads, R>(smem, tid);
|
||||
|
||||
if (tid == 0)
|
||||
if (tid == 0)
|
||||
result[0] = smem[0];
|
||||
}
|
||||
|
||||
@ -1142,7 +1142,7 @@ namespace cv { namespace gpu { namespace device
|
||||
sumInSmem<nthreads, R>(smem, tid);
|
||||
sumInSmem<nthreads, R>(smem + nthreads, tid);
|
||||
|
||||
if (tid == 0)
|
||||
if (tid == 0)
|
||||
{
|
||||
res.x = smem[0];
|
||||
res.y = smem[nthreads];
|
||||
@ -1151,7 +1151,7 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
}
|
||||
#else
|
||||
if (tid == 0)
|
||||
if (tid == 0)
|
||||
{
|
||||
DstType res;
|
||||
res.x = smem[0];
|
||||
@ -1179,7 +1179,7 @@ namespace cv { namespace gpu { namespace device
|
||||
sumInSmem<nthreads, R>(smem, tid);
|
||||
sumInSmem<nthreads, R>(smem + nthreads, tid);
|
||||
|
||||
if (tid == 0)
|
||||
if (tid == 0)
|
||||
{
|
||||
res.x = smem[0];
|
||||
res.y = smem[nthreads];
|
||||
@ -1252,7 +1252,7 @@ namespace cv { namespace gpu { namespace device
|
||||
sumInSmem<nthreads, R>(smem + nthreads, tid);
|
||||
sumInSmem<nthreads, R>(smem + 2 * nthreads, tid);
|
||||
|
||||
if (tid == 0)
|
||||
if (tid == 0)
|
||||
{
|
||||
res.x = smem[0];
|
||||
res.y = smem[nthreads];
|
||||
@ -1262,7 +1262,7 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
}
|
||||
#else
|
||||
if (tid == 0)
|
||||
if (tid == 0)
|
||||
{
|
||||
DstType res;
|
||||
res.x = smem[0];
|
||||
@ -1293,7 +1293,7 @@ namespace cv { namespace gpu { namespace device
|
||||
sumInSmem<nthreads, R>(smem + nthreads, tid);
|
||||
sumInSmem<nthreads, R>(smem + 2 * nthreads, tid);
|
||||
|
||||
if (tid == 0)
|
||||
if (tid == 0)
|
||||
{
|
||||
res.x = smem[0];
|
||||
res.y = smem[nthreads];
|
||||
@ -1323,7 +1323,7 @@ namespace cv { namespace gpu { namespace device
|
||||
for (int x = 0; x < ctwidth && x0 + x * blockDim.x < src.cols; ++x)
|
||||
{
|
||||
val = ptr[x0 + x * blockDim.x];
|
||||
sum = sum + VecTraits<DstType>::make(Op::call(val.x), Op::call(val.y),
|
||||
sum = sum + VecTraits<DstType>::make(Op::call(val.x), Op::call(val.y),
|
||||
Op::call(val.z), Op::call(val.w));
|
||||
}
|
||||
}
|
||||
@ -1372,7 +1372,7 @@ namespace cv { namespace gpu { namespace device
|
||||
sumInSmem<nthreads, R>(smem + 2 * nthreads, tid);
|
||||
sumInSmem<nthreads, R>(smem + 3 * nthreads, tid);
|
||||
|
||||
if (tid == 0)
|
||||
if (tid == 0)
|
||||
{
|
||||
res.x = smem[0];
|
||||
res.y = smem[nthreads];
|
||||
@ -1383,7 +1383,7 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
}
|
||||
#else
|
||||
if (tid == 0)
|
||||
if (tid == 0)
|
||||
{
|
||||
DstType res;
|
||||
res.x = smem[0];
|
||||
@ -1417,7 +1417,7 @@ namespace cv { namespace gpu { namespace device
|
||||
sumInSmem<nthreads, R>(smem + 2 * nthreads, tid);
|
||||
sumInSmem<nthreads, R>(smem + 3 * nthreads, tid);
|
||||
|
||||
if (tid == 0)
|
||||
if (tid == 0)
|
||||
{
|
||||
res.x = smem[0];
|
||||
res.y = smem[nthreads];
|
||||
@ -1488,7 +1488,7 @@ namespace cv { namespace gpu { namespace device
|
||||
sum[1] = result[1];
|
||||
sum[2] = result[2];
|
||||
sum[3] = result[3];
|
||||
}
|
||||
}
|
||||
|
||||
template void sumMultipassCaller<uchar>(const DevMem2Db, PtrStepb, double*, int);
|
||||
template void sumMultipassCaller<char>(const DevMem2Db, PtrStepb, double*, int);
|
||||
@ -1537,7 +1537,7 @@ namespace cv { namespace gpu { namespace device
|
||||
sum[1] = result[1];
|
||||
sum[2] = result[2];
|
||||
sum[3] = result[3];
|
||||
}
|
||||
}
|
||||
|
||||
template void sumCaller<uchar>(const DevMem2Db, PtrStepb, double*, int);
|
||||
template void sumCaller<char>(const DevMem2Db, PtrStepb, double*, int);
|
||||
@ -1608,7 +1608,7 @@ namespace cv { namespace gpu { namespace device
|
||||
sum[1] = result[1];
|
||||
sum[2] = result[2];
|
||||
sum[3] = result[3];
|
||||
}
|
||||
}
|
||||
|
||||
template void absSumMultipassCaller<uchar>(const DevMem2Db, PtrStepb, double*, int);
|
||||
template void absSumMultipassCaller<char>(const DevMem2Db, PtrStepb, double*, int);
|
||||
@ -1728,7 +1728,7 @@ namespace cv { namespace gpu { namespace device
|
||||
sum[1] = result[1];
|
||||
sum[2] = result[2];
|
||||
sum[3] = result[3];
|
||||
}
|
||||
}
|
||||
|
||||
template void sqrSumMultipassCaller<uchar>(const DevMem2Db, PtrStepb, double*, int);
|
||||
template void sqrSumMultipassCaller<char>(const DevMem2Db, PtrStepb, double*, int);
|
||||
@ -1894,7 +1894,7 @@ namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
for (int y = threadIdx.y; y < src.rows; y += 16)
|
||||
myVal = op(myVal, src.ptr(y)[x]);
|
||||
}
|
||||
}
|
||||
|
||||
smem[threadIdx.x * 16 + threadIdx.y] = myVal;
|
||||
__syncthreads();
|
||||
@ -1931,11 +1931,11 @@ namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
typedef void (*caller_t)(const DevMem2D_<T>& src, DevMem2D_<D> dst, cudaStream_t stream);
|
||||
|
||||
static const caller_t callers[] =
|
||||
static const caller_t callers[] =
|
||||
{
|
||||
reduceRows_caller<SumReductor, T, S, D>,
|
||||
reduceRows_caller<AvgReductor, T, S, D>,
|
||||
reduceRows_caller<MaxReductor, T, S, D>,
|
||||
reduceRows_caller<SumReductor, T, S, D>,
|
||||
reduceRows_caller<AvgReductor, T, S, D>,
|
||||
reduceRows_caller<MaxReductor, T, S, D>,
|
||||
reduceRows_caller<MinReductor, T, S, D>
|
||||
};
|
||||
|
||||
@ -1944,15 +1944,15 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
template void reduceRows_gpu<uchar, int, uchar>(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||
template void reduceRows_gpu<uchar, int, int>(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||
template void reduceRows_gpu<uchar, int, float>(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||
template void reduceRows_gpu<uchar, int, float>(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||
|
||||
template void reduceRows_gpu<ushort, int, ushort>(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||
template void reduceRows_gpu<ushort, int, int>(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||
template void reduceRows_gpu<ushort, int, float>(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||
template void reduceRows_gpu<ushort, int, float>(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||
|
||||
template void reduceRows_gpu<short, int, short>(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||
template void reduceRows_gpu<short, int, int>(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||
template void reduceRows_gpu<short, int, float>(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||
template void reduceRows_gpu<short, int, float>(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||
|
||||
template void reduceRows_gpu<int, int, int>(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||
template void reduceRows_gpu<int, int, float>(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||
@ -2068,7 +2068,7 @@ namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
typedef void (*caller_t)(const DevMem2D_<T>& src, DevMem2D_<D> dst, cudaStream_t stream);
|
||||
|
||||
static const caller_t callers[4][4] =
|
||||
static const caller_t callers[4][4] =
|
||||
{
|
||||
{reduceCols_caller<1, SumReductor, T, S, D>, reduceCols_caller<1, AvgReductor, T, S, D>, reduceCols_caller<1, MaxReductor, T, S, D>, reduceCols_caller<1, MinReductor, T, S, D>},
|
||||
{reduceCols_caller<2, SumReductor, T, S, D>, reduceCols_caller<2, AvgReductor, T, S, D>, reduceCols_caller<2, MaxReductor, T, S, D>, reduceCols_caller<2, MinReductor, T, S, D>},
|
||||
@ -2083,15 +2083,15 @@ namespace cv { namespace gpu { namespace device
|
||||
template void reduceCols_gpu<uchar, int, int>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||
template void reduceCols_gpu<uchar, int, float>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||
|
||||
template void reduceCols_gpu<ushort, int, ushort>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||
template void reduceCols_gpu<ushort, int, int>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||
template void reduceCols_gpu<ushort, int, ushort>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||
template void reduceCols_gpu<ushort, int, int>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||
template void reduceCols_gpu<ushort, int, float>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||
|
||||
template void reduceCols_gpu<short, int, short>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||
template void reduceCols_gpu<short, int, int>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||
template void reduceCols_gpu<short, int, float>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||
template void reduceCols_gpu<short, int, short>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||
template void reduceCols_gpu<short, int, int>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||
template void reduceCols_gpu<short, int, float>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||
|
||||
template void reduceCols_gpu<int, int, int>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||
template void reduceCols_gpu<int, int, int>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||
template void reduceCols_gpu<int, int, float>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||
|
||||
template void reduceCols_gpu<float, float, float>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||
|
@ -42,7 +42,7 @@
|
||||
|
||||
#include "opencv2/gpu/device/common.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace optical_flow
|
||||
{
|
||||
@ -50,7 +50,7 @@ namespace cv { namespace gpu { namespace device
|
||||
#define NUM_VERTS_PER_ARROW 6
|
||||
|
||||
__global__ void NeedleMapAverageKernel(const DevMem2Df u, const PtrStepf v, PtrStepf u_avg, PtrStepf v_avg)
|
||||
{
|
||||
{
|
||||
__shared__ float smem[2 * NEEDLE_MAP_SCALE];
|
||||
|
||||
volatile float* u_col_sum = smem;
|
||||
@ -70,7 +70,7 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
|
||||
if (threadIdx.x < 8)
|
||||
{
|
||||
{
|
||||
// now add the column sums
|
||||
const uint X = threadIdx.x;
|
||||
|
||||
@ -80,8 +80,8 @@ namespace cv { namespace gpu { namespace device
|
||||
v_col_sum[threadIdx.x] += v_col_sum[threadIdx.x + 1];
|
||||
}
|
||||
|
||||
if (X | 0xfe == 0xfc) // bits 0 & 1 == 0
|
||||
{
|
||||
if (X | 0xfe == 0xfc) // bits 0 & 1 == 0
|
||||
{
|
||||
u_col_sum[threadIdx.x] += u_col_sum[threadIdx.x + 2];
|
||||
v_col_sum[threadIdx.x] += v_col_sum[threadIdx.x + 2];
|
||||
}
|
||||
@ -110,7 +110,7 @@ namespace cv { namespace gpu { namespace device
|
||||
v_avg(blockIdx.y, blockIdx.x) = v_col_sum[0];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void NeedleMapAverage_gpu(DevMem2Df u, DevMem2Df v, DevMem2Df u_avg, DevMem2Df v_avg)
|
||||
{
|
||||
const dim3 block(NEEDLE_MAP_SCALE);
|
||||
|
@ -40,7 +40,7 @@
|
||||
//
|
||||
// Copyright (c) 2010, Paul Furgale, Chi Hay Tong
|
||||
//
|
||||
// The original code was written by Paul Furgale and Chi Hay Tong
|
||||
// The original code was written by Paul Furgale and Chi Hay Tong
|
||||
// and later optimized and prepared for integration into OpenCV by Itseez.
|
||||
//
|
||||
//M*/
|
||||
@ -51,7 +51,7 @@
|
||||
#include "opencv2/gpu/device/utility.hpp"
|
||||
#include "opencv2/gpu/device/functional.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace orb
|
||||
{
|
||||
@ -59,7 +59,7 @@ namespace cv { namespace gpu { namespace device
|
||||
// cull
|
||||
|
||||
int cull_gpu(int* loc, float* response, int size, int n_points)
|
||||
{
|
||||
{
|
||||
thrust::device_ptr<int> loc_ptr(loc);
|
||||
thrust::device_ptr<float> response_ptr(response);
|
||||
|
||||
@ -83,10 +83,10 @@ namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
const short2 loc = loc_[ptidx];
|
||||
|
||||
const int r = blockSize / 2;
|
||||
const int r = blockSize / 2;
|
||||
const int x0 = loc.x - r;
|
||||
const int y0 = loc.y - r;
|
||||
|
||||
|
||||
int a = 0, b = 0, c = 0;
|
||||
|
||||
for (int ind = threadIdx.x; ind < blockSize * blockSize; ind += blockDim.x)
|
||||
@ -94,12 +94,12 @@ namespace cv { namespace gpu { namespace device
|
||||
const int i = ind / blockSize;
|
||||
const int j = ind % blockSize;
|
||||
|
||||
int Ix = (img(y0 + i, x0 + j + 1) - img(y0 + i, x0 + j - 1)) * 2 +
|
||||
(img(y0 + i - 1, x0 + j + 1) - img(y0 + i - 1, x0 + j - 1)) +
|
||||
int Ix = (img(y0 + i, x0 + j + 1) - img(y0 + i, x0 + j - 1)) * 2 +
|
||||
(img(y0 + i - 1, x0 + j + 1) - img(y0 + i - 1, x0 + j - 1)) +
|
||||
(img(y0 + i + 1, x0 + j + 1) - img(y0 + i + 1, x0 + j - 1));
|
||||
|
||||
int Iy = (img(y0 + i + 1, x0 + j) - img(y0 + i - 1, x0 + j)) * 2 +
|
||||
(img(y0 + i + 1, x0 + j - 1) - img(y0 + i - 1, x0 + j - 1)) +
|
||||
int Iy = (img(y0 + i + 1, x0 + j) - img(y0 + i - 1, x0 + j)) * 2 +
|
||||
(img(y0 + i + 1, x0 + j - 1) - img(y0 + i - 1, x0 + j - 1)) +
|
||||
(img(y0 + i + 1, x0 + j + 1) - img(y0 + i - 1, x0 + j + 1));
|
||||
|
||||
a += Ix * Ix;
|
||||
@ -160,7 +160,7 @@ namespace cv { namespace gpu { namespace device
|
||||
int m_01 = 0, m_10 = 0;
|
||||
|
||||
const short2 loc = loc_[ptidx];
|
||||
|
||||
|
||||
// Treat the center line differently, v=0
|
||||
for (int u = threadIdx.x - half_k; u <= half_k; u += blockDim.x)
|
||||
m_10 += u * image(loc.y, loc.x + u);
|
||||
@ -173,7 +173,7 @@ namespace cv { namespace gpu { namespace device
|
||||
int v_sum = 0;
|
||||
int m_sum = 0;
|
||||
const int d = c_u_max[v];
|
||||
|
||||
|
||||
for (int u = threadIdx.x - d; u <= d; u += blockDim.x)
|
||||
{
|
||||
int val_plus = image(loc.y + v, loc.x + u);
|
||||
@ -229,7 +229,7 @@ namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
__device__ static int calc(const PtrStepb& img, short2 loc, const int* pattern_x, const int* pattern_y, float sina, float cosa, int i)
|
||||
{
|
||||
pattern_x += 16 * i;
|
||||
pattern_x += 16 * i;
|
||||
pattern_y += 16 * i;
|
||||
|
||||
int t0, t1, val;
|
||||
@ -257,7 +257,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
t0 = GET_VALUE(14); t1 = GET_VALUE(15);
|
||||
val |= (t0 < t1) << 7;
|
||||
|
||||
|
||||
return val;
|
||||
}
|
||||
};
|
||||
@ -266,23 +266,23 @@ namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
__device__ static int calc(const PtrStepb& img, short2 loc, const int* pattern_x, const int* pattern_y, float sina, float cosa, int i)
|
||||
{
|
||||
pattern_x += 12 * i;
|
||||
pattern_x += 12 * i;
|
||||
pattern_y += 12 * i;
|
||||
|
||||
|
||||
int t0, t1, t2, val;
|
||||
|
||||
t0 = GET_VALUE(0); t1 = GET_VALUE(1); t2 = GET_VALUE(2);
|
||||
val = t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0);
|
||||
|
||||
|
||||
t0 = GET_VALUE(3); t1 = GET_VALUE(4); t2 = GET_VALUE(5);
|
||||
val |= (t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0)) << 2;
|
||||
|
||||
|
||||
t0 = GET_VALUE(6); t1 = GET_VALUE(7); t2 = GET_VALUE(8);
|
||||
val |= (t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0)) << 4;
|
||||
|
||||
|
||||
t0 = GET_VALUE(9); t1 = GET_VALUE(10); t2 = GET_VALUE(11);
|
||||
val |= (t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0)) << 6;
|
||||
|
||||
|
||||
return val;
|
||||
}
|
||||
};
|
||||
@ -291,9 +291,9 @@ namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
__device__ static int calc(const PtrStepb& img, short2 loc, const int* pattern_x, const int* pattern_y, float sina, float cosa, int i)
|
||||
{
|
||||
pattern_x += 16 * i;
|
||||
pattern_x += 16 * i;
|
||||
pattern_y += 16 * i;
|
||||
|
||||
|
||||
int t0, t1, t2, t3, k, val;
|
||||
int a, b;
|
||||
|
||||
@ -304,7 +304,7 @@ namespace cv { namespace gpu { namespace device
|
||||
if( t3 > t2 ) t2 = t3, b = 3;
|
||||
k = t0 > t2 ? a : b;
|
||||
val = k;
|
||||
|
||||
|
||||
t0 = GET_VALUE(4); t1 = GET_VALUE(5);
|
||||
t2 = GET_VALUE(6); t3 = GET_VALUE(7);
|
||||
a = 0, b = 2;
|
||||
@ -312,7 +312,7 @@ namespace cv { namespace gpu { namespace device
|
||||
if( t3 > t2 ) t2 = t3, b = 3;
|
||||
k = t0 > t2 ? a : b;
|
||||
val |= k << 2;
|
||||
|
||||
|
||||
t0 = GET_VALUE(8); t1 = GET_VALUE(9);
|
||||
t2 = GET_VALUE(10); t3 = GET_VALUE(11);
|
||||
a = 0, b = 2;
|
||||
@ -320,7 +320,7 @@ namespace cv { namespace gpu { namespace device
|
||||
if( t3 > t2 ) t2 = t3, b = 3;
|
||||
k = t0 > t2 ? a : b;
|
||||
val |= k << 4;
|
||||
|
||||
|
||||
t0 = GET_VALUE(12); t1 = GET_VALUE(13);
|
||||
t2 = GET_VALUE(14); t3 = GET_VALUE(15);
|
||||
a = 0, b = 2;
|
||||
@ -328,7 +328,7 @@ namespace cv { namespace gpu { namespace device
|
||||
if( t3 > t2 ) t2 = t3, b = 3;
|
||||
k = t0 > t2 ? a : b;
|
||||
val |= k << 6;
|
||||
|
||||
|
||||
return val;
|
||||
}
|
||||
};
|
||||
@ -399,7 +399,7 @@ namespace cv { namespace gpu { namespace device
|
||||
y[ptidx] = loc.y * scale;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void mergeLocation_gpu(const short2* loc, float* x, float* y, int npoints, float scale, cudaStream_t stream)
|
||||
{
|
||||
dim3 block(256);
|
||||
|
@ -69,7 +69,7 @@ namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
static void call(DevMem2D_<T> src, DevMem2Df mapx, DevMem2Df mapy, DevMem2D_<T> dst, const float* borderValue, cudaStream_t stream, int)
|
||||
{
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
|
||||
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
@ -159,7 +159,7 @@ namespace cv { namespace gpu { namespace device
|
||||
cudaSafeCall( cudaDeviceSynchronize() ); \
|
||||
} \
|
||||
};
|
||||
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_REMAP_TEX(uchar)
|
||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(uchar2)
|
||||
OPENCV_GPU_IMPLEMENT_REMAP_TEX(uchar4)
|
||||
@ -188,7 +188,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcher
|
||||
{
|
||||
static void call(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, DevMem2Df mapx, DevMem2Df mapy,
|
||||
static void call(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, DevMem2Df mapx, DevMem2Df mapy,
|
||||
DevMem2D_<T> dst, const float* borderValue, cudaStream_t stream, int cc)
|
||||
{
|
||||
if (stream == 0)
|
||||
@ -198,13 +198,13 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T> void remap_gpu(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap,
|
||||
template <typename T> void remap_gpu(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap,
|
||||
DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc)
|
||||
{
|
||||
typedef void (*caller_t)(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap,
|
||||
typedef void (*caller_t)(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap,
|
||||
DevMem2D_<T> dst, const float* borderValue, cudaStream_t stream, int cc);
|
||||
|
||||
static const caller_t callers[3][5] =
|
||||
static const caller_t callers[3][5] =
|
||||
{
|
||||
{
|
||||
RemapDispatcher<PointFilter, BrdReflect101, T>::call,
|
||||
@ -229,7 +229,7 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
};
|
||||
|
||||
callers[interpolation][borderMode](static_cast< DevMem2D_<T> >(src), static_cast< DevMem2D_<T> >(srcWhole), xoff, yoff, xmap, ymap,
|
||||
callers[interpolation][borderMode](static_cast< DevMem2D_<T> >(src), static_cast< DevMem2D_<T> >(srcWhole), xoff, yoff, xmap, ymap,
|
||||
static_cast< DevMem2D_<T> >(dst), borderValue, stream, cc);
|
||||
}
|
||||
|
||||
|
@ -229,7 +229,7 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T> void resize_gpu(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy,
|
||||
template <typename T> void resize_gpu(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy,
|
||||
DevMem2Db dst, int interpolation, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*caller_t)(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream);
|
||||
@ -245,7 +245,7 @@ namespace cv { namespace gpu { namespace device
|
||||
if (interpolation == 3 && (fx <= 1.f || fy <= 1.f))
|
||||
interpolation = 1;
|
||||
|
||||
callers[interpolation](static_cast< DevMem2D_<T> >(src), static_cast< DevMem2D_<T> >(srcWhole), xoff, yoff, fx, fy,
|
||||
callers[interpolation](static_cast< DevMem2D_<T> >(src), static_cast< DevMem2D_<T> >(srcWhole), xoff, yoff, fx, fy,
|
||||
static_cast< DevMem2D_<T> >(dst), stream);
|
||||
}
|
||||
|
||||
|
@ -48,9 +48,9 @@
|
||||
#include "opencv2/gpu/device/border_interpolate.hpp"
|
||||
#include "opencv2/gpu/device/static_check.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace row_filter
|
||||
namespace row_filter
|
||||
{
|
||||
#define MAX_KERNEL_SIZE 32
|
||||
|
||||
@ -79,7 +79,7 @@ namespace cv { namespace gpu { namespace device
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_t;
|
||||
|
||||
__shared__ sum_t smem[BLOCK_DIM_Y][(PATCH_PER_BLOCK + 2 * HALO_SIZE) * BLOCK_DIM_X];
|
||||
|
||||
|
||||
const int y = blockIdx.y * BLOCK_DIM_Y + threadIdx.y;
|
||||
|
||||
if (y >= src.rows)
|
||||
@ -161,7 +161,7 @@ namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
typedef void (*caller_t)(DevMem2D_<T> src, DevMem2D_<D> dst, int anchor, int cc, cudaStream_t stream);
|
||||
|
||||
static const caller_t callers[5][33] =
|
||||
static const caller_t callers[5][33] =
|
||||
{
|
||||
{
|
||||
0,
|
||||
@ -337,9 +337,9 @@ namespace cv { namespace gpu { namespace device
|
||||
linearRowFilter_caller<30, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller<31, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller<32, T, D, BrdRowWrap>
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
loadKernel(kernel, ksize);
|
||||
|
||||
callers[brd_type][ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor, cc, stream);
|
||||
|
@ -60,7 +60,7 @@
|
||||
#define cublasSafeCall(expr) ___cublasSafeCall(expr, __FILE__, __LINE__)
|
||||
#endif
|
||||
|
||||
namespace cv { namespace gpu
|
||||
namespace cv { namespace gpu
|
||||
{
|
||||
void nppError(int err, const char *file, const int line, const char *func = "");
|
||||
void ncvError(int err, const char *file, const int line, const char *func = "");
|
||||
|
@ -42,12 +42,12 @@
|
||||
|
||||
#include "internal_shared.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace split_merge
|
||||
namespace split_merge
|
||||
{
|
||||
template <typename T, size_t elem_size = sizeof(T)>
|
||||
struct TypeTraits
|
||||
struct TypeTraits
|
||||
{
|
||||
typedef T type;
|
||||
typedef T type2;
|
||||
@ -74,7 +74,7 @@ namespace cv { namespace gpu { namespace device
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct TypeTraits<T, 4>
|
||||
struct TypeTraits<T, 4>
|
||||
{
|
||||
typedef int type;
|
||||
typedef int2 type2;
|
||||
@ -83,7 +83,7 @@ namespace cv { namespace gpu { namespace device
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct TypeTraits<T, 8>
|
||||
struct TypeTraits<T, 8>
|
||||
{
|
||||
typedef double type;
|
||||
typedef double2 type2;
|
||||
@ -95,11 +95,11 @@ namespace cv { namespace gpu { namespace device
|
||||
typedef void (*SplitFunction)(const DevMem2Db& src, DevMem2Db* dst, const cudaStream_t& stream);
|
||||
|
||||
//------------------------------------------------------------
|
||||
// Merge
|
||||
// Merge
|
||||
|
||||
template <typename T>
|
||||
__global__ void mergeC2_(const uchar* src0, size_t src0_step,
|
||||
const uchar* src1, size_t src1_step,
|
||||
__global__ void mergeC2_(const uchar* src0, size_t src0_step,
|
||||
const uchar* src1, size_t src1_step,
|
||||
int rows, int cols, uchar* dst, size_t dst_step)
|
||||
{
|
||||
typedef typename TypeTraits<T>::type2 dst_type;
|
||||
@ -111,8 +111,8 @@ namespace cv { namespace gpu { namespace device
|
||||
const T* src1_y = (const T*)(src1 + y * src1_step);
|
||||
dst_type* dst_y = (dst_type*)(dst + y * dst_step);
|
||||
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
dst_type dst_elem;
|
||||
dst_elem.x = src0_y[x];
|
||||
dst_elem.y = src1_y[x];
|
||||
@ -122,9 +122,9 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
|
||||
template <typename T>
|
||||
__global__ void mergeC3_(const uchar* src0, size_t src0_step,
|
||||
const uchar* src1, size_t src1_step,
|
||||
const uchar* src2, size_t src2_step,
|
||||
__global__ void mergeC3_(const uchar* src0, size_t src0_step,
|
||||
const uchar* src1, size_t src1_step,
|
||||
const uchar* src2, size_t src2_step,
|
||||
int rows, int cols, uchar* dst, size_t dst_step)
|
||||
{
|
||||
typedef typename TypeTraits<T>::type3 dst_type;
|
||||
@ -137,8 +137,8 @@ namespace cv { namespace gpu { namespace device
|
||||
const T* src2_y = (const T*)(src2 + y * src2_step);
|
||||
dst_type* dst_y = (dst_type*)(dst + y * dst_step);
|
||||
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
dst_type dst_elem;
|
||||
dst_elem.x = src0_y[x];
|
||||
dst_elem.y = src1_y[x];
|
||||
@ -149,9 +149,9 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
|
||||
template <>
|
||||
__global__ void mergeC3_<double>(const uchar* src0, size_t src0_step,
|
||||
const uchar* src1, size_t src1_step,
|
||||
const uchar* src2, size_t src2_step,
|
||||
__global__ void mergeC3_<double>(const uchar* src0, size_t src0_step,
|
||||
const uchar* src1, size_t src1_step,
|
||||
const uchar* src2, size_t src2_step,
|
||||
int rows, int cols, uchar* dst, size_t dst_step)
|
||||
{
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
@ -162,8 +162,8 @@ namespace cv { namespace gpu { namespace device
|
||||
const double* src2_y = (const double*)(src2 + y * src2_step);
|
||||
double* dst_y = (double*)(dst + y * dst_step);
|
||||
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
dst_y[3 * x] = src0_y[x];
|
||||
dst_y[3 * x + 1] = src1_y[x];
|
||||
dst_y[3 * x + 2] = src2_y[x];
|
||||
@ -172,10 +172,10 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
|
||||
template <typename T>
|
||||
__global__ void mergeC4_(const uchar* src0, size_t src0_step,
|
||||
const uchar* src1, size_t src1_step,
|
||||
const uchar* src2, size_t src2_step,
|
||||
const uchar* src3, size_t src3_step,
|
||||
__global__ void mergeC4_(const uchar* src0, size_t src0_step,
|
||||
const uchar* src1, size_t src1_step,
|
||||
const uchar* src2, size_t src2_step,
|
||||
const uchar* src3, size_t src3_step,
|
||||
int rows, int cols, uchar* dst, size_t dst_step)
|
||||
{
|
||||
typedef typename TypeTraits<T>::type4 dst_type;
|
||||
@ -189,8 +189,8 @@ namespace cv { namespace gpu { namespace device
|
||||
const T* src3_y = (const T*)(src3 + y * src3_step);
|
||||
dst_type* dst_y = (dst_type*)(dst + y * dst_step);
|
||||
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
dst_type dst_elem;
|
||||
dst_elem.x = src0_y[x];
|
||||
dst_elem.y = src1_y[x];
|
||||
@ -202,10 +202,10 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
|
||||
template <>
|
||||
__global__ void mergeC4_<double>(const uchar* src0, size_t src0_step,
|
||||
const uchar* src1, size_t src1_step,
|
||||
const uchar* src2, size_t src2_step,
|
||||
const uchar* src3, size_t src3_step,
|
||||
__global__ void mergeC4_<double>(const uchar* src0, size_t src0_step,
|
||||
const uchar* src1, size_t src1_step,
|
||||
const uchar* src2, size_t src2_step,
|
||||
const uchar* src3, size_t src3_step,
|
||||
int rows, int cols, uchar* dst, size_t dst_step)
|
||||
{
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
@ -217,8 +217,8 @@ namespace cv { namespace gpu { namespace device
|
||||
const double* src3_y = (const double*)(src3 + y * src3_step);
|
||||
double2* dst_y = (double2*)(dst + y * dst_step);
|
||||
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
dst_y[2 * x] = make_double2(src0_y[x], src1_y[x]);
|
||||
dst_y[2 * x + 1] = make_double2(src2_y[x], src3_y[x]);
|
||||
}
|
||||
@ -303,7 +303,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
|
||||
template <typename T>
|
||||
__global__ void splitC2_(const uchar* src, size_t src_step,
|
||||
__global__ void splitC2_(const uchar* src, size_t src_step,
|
||||
int rows, int cols,
|
||||
uchar* dst0, size_t dst0_step,
|
||||
uchar* dst1, size_t dst1_step)
|
||||
@ -317,7 +317,7 @@ namespace cv { namespace gpu { namespace device
|
||||
T* dst0_y = (T*)(dst0 + y * dst0_step);
|
||||
T* dst1_y = (T*)(dst1 + y * dst1_step);
|
||||
|
||||
if (x < cols && y < rows)
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
src_type src_elem = src_y[x];
|
||||
dst0_y[x] = src_elem.x;
|
||||
@ -327,7 +327,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
|
||||
template <typename T>
|
||||
__global__ void splitC3_(const uchar* src, size_t src_step,
|
||||
__global__ void splitC3_(const uchar* src, size_t src_step,
|
||||
int rows, int cols,
|
||||
uchar* dst0, size_t dst0_step,
|
||||
uchar* dst1, size_t dst1_step,
|
||||
@ -343,7 +343,7 @@ namespace cv { namespace gpu { namespace device
|
||||
T* dst1_y = (T*)(dst1 + y * dst1_step);
|
||||
T* dst2_y = (T*)(dst2 + y * dst2_step);
|
||||
|
||||
if (x < cols && y < rows)
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
src_type src_elem = src_y[x];
|
||||
dst0_y[x] = src_elem.x;
|
||||
@ -368,7 +368,7 @@ namespace cv { namespace gpu { namespace device
|
||||
double* dst1_y = (double*)(dst1 + y * dst1_step);
|
||||
double* dst2_y = (double*)(dst2 + y * dst2_step);
|
||||
|
||||
if (x < cols && y < rows)
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
dst0_y[x] = src_y[3 * x];
|
||||
dst1_y[x] = src_y[3 * x + 1];
|
||||
@ -395,7 +395,7 @@ namespace cv { namespace gpu { namespace device
|
||||
T* dst2_y = (T*)(dst2 + y * dst2_step);
|
||||
T* dst3_y = (T*)(dst3 + y * dst3_step);
|
||||
|
||||
if (x < cols && y < rows)
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
src_type src_elem = src_y[x];
|
||||
dst0_y[x] = src_elem.x;
|
||||
@ -423,7 +423,7 @@ namespace cv { namespace gpu { namespace device
|
||||
double* dst2_y = (double*)(dst2 + y * dst2_step);
|
||||
double* dst3_y = (double*)(dst3 + y * dst3_step);
|
||||
|
||||
if (x < cols && y < rows)
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
double2 src_elem1 = src_y[2 * x];
|
||||
double2 src_elem2 = src_y[2 * x + 1];
|
||||
|
@ -42,9 +42,9 @@
|
||||
|
||||
#include "internal_shared.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace stereobm
|
||||
namespace stereobm
|
||||
{
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
/////////////////////////////////////// Stereo BM ////////////////////////////////////////////////
|
||||
@ -70,7 +70,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
template<int RADIUS>
|
||||
__device__ unsigned int CalcSSD(volatile unsigned int *col_ssd_cache, volatile unsigned int *col_ssd)
|
||||
{
|
||||
{
|
||||
unsigned int cache = 0;
|
||||
unsigned int cache2 = 0;
|
||||
|
||||
@ -401,8 +401,8 @@ namespace cv { namespace gpu { namespace device
|
||||
prefilter_kernel<<<grid, threads, 0, stream>>>(output, prefilterCap);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
cudaSafeCall( cudaUnbindTexture (texForSobel ) );
|
||||
}
|
||||
|
@ -44,9 +44,9 @@
|
||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||
#include "opencv2/gpu/device/limits.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace stereobp
|
||||
namespace stereobp
|
||||
{
|
||||
///////////////////////////////////////////////////////////////
|
||||
/////////////////////// load constants ////////////////////////
|
||||
|
@ -44,9 +44,9 @@
|
||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||
#include "opencv2/gpu/device/limits.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace stereocsbp
|
||||
namespace stereocsbp
|
||||
{
|
||||
///////////////////////////////////////////////////////////////
|
||||
/////////////////////// load constants ////////////////////////
|
||||
@ -62,7 +62,7 @@ namespace cv { namespace gpu { namespace device
|
||||
__constant__ int cth;
|
||||
|
||||
__constant__ size_t cimg_step;
|
||||
__constant__ size_t cmsg_step;
|
||||
__constant__ size_t cmsg_step;
|
||||
__constant__ size_t cdisp_step1;
|
||||
__constant__ size_t cdisp_step2;
|
||||
|
||||
@ -392,7 +392,7 @@ namespace cv { namespace gpu { namespace device
|
||||
get_first_k_initial_local<<<grid, threads, 0, stream>>> (data_cost_selected, disp_selected_pyr, h, w, nr_plane);
|
||||
else
|
||||
get_first_k_initial_global<<<grid, threads, 0, stream>>>(data_cost_selected, disp_selected_pyr, h, w, nr_plane);
|
||||
|
||||
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
@ -575,7 +575,7 @@ namespace cv { namespace gpu { namespace device
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step1, sizeof(size_t)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cdisp_step2, &disp_step2, sizeof(size_t)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cmsg_step, &msg_step, sizeof(size_t)) );
|
||||
|
||||
|
||||
callers[level](disp_selected_pyr, data_cost, rows, cols, h, w, level, nr_plane, channels, stream);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
@ -588,13 +588,13 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
template void compute_data_cost(const float* disp_selected_pyr, float* data_cost, size_t msg_step,
|
||||
int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, cudaStream_t stream);
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
//////////////////////// init message /////////////////////////
|
||||
///////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
|
||||
template <typename T>
|
||||
__device__ void get_first_k_element_increase(T* u_new, T* d_new, T* l_new, T* r_new,
|
||||
const T* u_cur, const T* d_cur, const T* l_cur, const T* r_cur,
|
||||
@ -691,7 +691,7 @@ namespace cv { namespace gpu { namespace device
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step1, sizeof(size_t)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cdisp_step2, &disp_step2, sizeof(size_t)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cmsg_step, &msg_step, sizeof(size_t)) );
|
||||
|
||||
|
||||
dim3 threads(32, 8, 1);
|
||||
dim3 grid(1, 1, 1);
|
||||
|
||||
@ -720,7 +720,7 @@ namespace cv { namespace gpu { namespace device
|
||||
const float* u_cur, const float* d_cur, const float* l_cur, const float* r_cur,
|
||||
float* selected_disp_pyr_new, const float* selected_disp_pyr_cur,
|
||||
float* data_cost_selected, const float* data_cost, size_t msg_step,
|
||||
int h, int w, int nr_plane, int h2, int w2, int nr_plane2, cudaStream_t stream);
|
||||
int h, int w, int nr_plane, int h2, int w2, int nr_plane2, cudaStream_t stream);
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
//////////////////// calc all iterations /////////////////////
|
||||
@ -805,7 +805,7 @@ namespace cv { namespace gpu { namespace device
|
||||
for(int t = 0; t < iters; ++t)
|
||||
{
|
||||
compute_message<<<grid, threads, 0, stream>>>(u, d, l, r, data_cost_selected, selected_disp_pyr_cur, h, w, nr_plane, t & 1);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
}
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
@ -814,7 +814,7 @@ namespace cv { namespace gpu { namespace device
|
||||
template void calc_all_iterations(short* u, short* d, short* l, short* r, const short* data_cost_selected, const short* selected_disp_pyr_cur, size_t msg_step,
|
||||
int h, int w, int nr_plane, int iters, cudaStream_t stream);
|
||||
|
||||
template void calc_all_iterations(float* u, float* d, float* l, float* r, const float* data_cost_selected, const float* selected_disp_pyr_cur, size_t msg_step,
|
||||
template void calc_all_iterations(float* u, float* d, float* l, float* r, const float* data_cost_selected, const float* selected_disp_pyr_cur, size_t msg_step,
|
||||
int h, int w, int nr_plane, int iters, cudaStream_t stream);
|
||||
|
||||
|
||||
@ -879,7 +879,7 @@ namespace cv { namespace gpu { namespace device
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template void compute_disp(const short* u, const short* d, const short* l, const short* r, const short* data_cost_selected, const short* disp_selected, size_t msg_step,
|
||||
template void compute_disp(const short* u, const short* d, const short* l, const short* r, const short* data_cost_selected, const short* disp_selected, size_t msg_step,
|
||||
const DevMem2D_<short>& disp, int nr_plane, cudaStream_t stream);
|
||||
|
||||
template void compute_disp(const float* u, const float* d, const float* l, const float* r, const float* data_cost_selected, const float* disp_selected, size_t msg_step,
|
||||
|
@ -98,7 +98,7 @@ namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(xmap.cols, block.x), divUp(xmap.rows, block.y));
|
||||
|
||||
|
||||
buildWarpMaps<Transform><<<grid, block, 0, stream>>>(xmap, ymap);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
@ -158,7 +158,7 @@ namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
static void call(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, DevMem2D_<T> dst, const float* borderValue, int)
|
||||
{
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
|
||||
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
@ -256,7 +256,7 @@ namespace cv { namespace gpu { namespace device
|
||||
#undef OPENCV_GPU_IMPLEMENT_WARP_TEX
|
||||
|
||||
template <class Transform, template <typename> class Filter, template <typename> class B, typename T> struct WarpDispatcher
|
||||
{
|
||||
{
|
||||
static void call(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, DevMem2D_<T> dst, const float* borderValue, cudaStream_t stream, int cc)
|
||||
{
|
||||
if (stream == 0)
|
||||
@ -266,7 +266,7 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
};
|
||||
|
||||
template <class Transform, typename T>
|
||||
template <class Transform, typename T>
|
||||
void warp_caller(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Db dst, int interpolation,
|
||||
int borderMode, const float* borderValue, cudaStream_t stream, int cc)
|
||||
{
|
||||
|
@ -98,17 +98,17 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
using namespace ::cv::gpu::device;
|
||||
|
||||
cv::gpu::HOGDescriptor::HOGDescriptor(Size win_size, Size block_size, Size block_stride, Size cell_size,
|
||||
int nbins, double win_sigma, double threshold_L2hys, bool gamma_correction, int nlevels)
|
||||
: win_size(win_size),
|
||||
block_size(block_size),
|
||||
block_stride(block_stride),
|
||||
cell_size(cell_size),
|
||||
nbins(nbins),
|
||||
win_sigma(win_sigma),
|
||||
threshold_L2hys(threshold_L2hys),
|
||||
gamma_correction(gamma_correction),
|
||||
nlevels(nlevels)
|
||||
cv::gpu::HOGDescriptor::HOGDescriptor(Size win_size_, Size block_size_, Size block_stride_, Size cell_size_,
|
||||
int nbins_, double win_sigma_, double threshold_L2hys_, bool gamma_correction_, int nlevels_)
|
||||
: win_size(win_size_),
|
||||
block_size(block_size_),
|
||||
block_stride(block_stride_),
|
||||
cell_size(cell_size_),
|
||||
nbins(nbins_),
|
||||
win_sigma(win_sigma_),
|
||||
threshold_L2hys(threshold_L2hys_),
|
||||
gamma_correction(gamma_correction_),
|
||||
nlevels(nlevels_)
|
||||
{
|
||||
CV_Assert((win_size.width - block_size.width ) % block_stride.width == 0 &&
|
||||
(win_size.height - block_size.height) % block_stride.height == 0);
|
||||
@ -149,9 +149,9 @@ bool cv::gpu::HOGDescriptor::checkDetectorSize() const
|
||||
return detector_size == 0 || detector_size == descriptor_size || detector_size == descriptor_size + 1;
|
||||
}
|
||||
|
||||
void cv::gpu::HOGDescriptor::setSVMDetector(const vector<float>& detector)
|
||||
void cv::gpu::HOGDescriptor::setSVMDetector(const vector<float>& _detector)
|
||||
{
|
||||
std::vector<float> detector_reordered(detector.size());
|
||||
std::vector<float> detector_reordered(_detector.size());
|
||||
|
||||
size_t block_hist_size = getBlockHistogramSize();
|
||||
cv::Size blocks_per_img = numPartsWithin(win_size, block_size, block_stride);
|
||||
@ -159,7 +159,7 @@ void cv::gpu::HOGDescriptor::setSVMDetector(const vector<float>& detector)
|
||||
for (int i = 0; i < blocks_per_img.height; ++i)
|
||||
for (int j = 0; j < blocks_per_img.width; ++j)
|
||||
{
|
||||
const float* src = &detector[0] + (j * blocks_per_img.height + i) * block_hist_size;
|
||||
const float* src = &_detector[0] + (j * blocks_per_img.height + i) * block_hist_size;
|
||||
float* dst = &detector_reordered[0] + (i * blocks_per_img.width + j) * block_hist_size;
|
||||
for (size_t k = 0; k < block_hist_size; ++k)
|
||||
dst[k] = src[k];
|
||||
@ -168,7 +168,7 @@ void cv::gpu::HOGDescriptor::setSVMDetector(const vector<float>& detector)
|
||||
this->detector.upload(Mat(detector_reordered).reshape(1, 1));
|
||||
|
||||
size_t descriptor_size = getDescriptorSize();
|
||||
free_coef = detector.size() > descriptor_size ? detector[descriptor_size] : 0;
|
||||
free_coef = _detector.size() > descriptor_size ? _detector[descriptor_size] : 0;
|
||||
|
||||
CV_Assert(checkDetectorSize());
|
||||
}
|
||||
@ -190,24 +190,24 @@ cv::gpu::GpuMat cv::gpu::HOGDescriptor::getBuffer(int rows, int cols, int type,
|
||||
}
|
||||
|
||||
|
||||
void cv::gpu::HOGDescriptor::computeGradient(const GpuMat& img, GpuMat& grad, GpuMat& qangle)
|
||||
void cv::gpu::HOGDescriptor::computeGradient(const GpuMat& img, GpuMat& _grad, GpuMat& _qangle)
|
||||
{
|
||||
CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4);
|
||||
|
||||
// grad.create(img.size(), CV_32FC2);
|
||||
grad = getBuffer(img.size(), CV_32FC2, grad_buf);
|
||||
_grad = getBuffer(img.size(), CV_32FC2, grad_buf);
|
||||
|
||||
// qangle.create(img.size(), CV_8UC2);
|
||||
qangle = getBuffer(img.size(), CV_8UC2, qangle_buf);
|
||||
_qangle = getBuffer(img.size(), CV_8UC2, qangle_buf);
|
||||
|
||||
float angleScale = (float)(nbins / CV_PI);
|
||||
switch (img.type())
|
||||
{
|
||||
case CV_8UC1:
|
||||
hog::compute_gradients_8UC1(nbins, img.rows, img.cols, img, angleScale, grad, qangle, gamma_correction);
|
||||
hog::compute_gradients_8UC1(nbins, img.rows, img.cols, img, angleScale, _grad, _qangle, gamma_correction);
|
||||
break;
|
||||
case CV_8UC4:
|
||||
hog::compute_gradients_8UC4(nbins, img.rows, img.cols, img, angleScale, grad, qangle, gamma_correction);
|
||||
hog::compute_gradients_8UC4(nbins, img.rows, img.cols, img, angleScale, _grad, _qangle, gamma_correction);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -323,8 +323,8 @@ void cv::gpu::HOGDescriptor::detectMultiScale(const GpuMat& img, vector<Rect>& f
|
||||
|
||||
for (size_t i = 0; i < level_scale.size(); i++)
|
||||
{
|
||||
double scale = level_scale[i];
|
||||
Size sz(cvRound(img.cols / scale), cvRound(img.rows / scale));
|
||||
double _scale = level_scale[i];
|
||||
Size sz(cvRound(img.cols / _scale), cvRound(img.rows / _scale));
|
||||
GpuMat smaller_img;
|
||||
|
||||
if (sz == img.size())
|
||||
|
@ -78,7 +78,7 @@ template <typename T>
|
||||
struct GraphEdge
|
||||
{
|
||||
GraphEdge() {}
|
||||
GraphEdge(int to, int next, const T& val) : to(to), next(next), val(val) {}
|
||||
GraphEdge(int to_, int next_, const T& val_) : to(to_), next(next_), val(val_) {}
|
||||
int to;
|
||||
int next;
|
||||
T val;
|
||||
@ -110,7 +110,7 @@ private:
|
||||
struct SegmLinkVal
|
||||
{
|
||||
SegmLinkVal() {}
|
||||
SegmLinkVal(int dr, int dsp) : dr(dr), dsp(dsp) {}
|
||||
SegmLinkVal(int dr_, int dsp_) : dr(dr_), dsp(dsp_) {}
|
||||
bool operator <(const SegmLinkVal& other) const
|
||||
{
|
||||
return dr + dsp < other.dr + other.dsp;
|
||||
@ -123,8 +123,8 @@ struct SegmLinkVal
|
||||
struct SegmLink
|
||||
{
|
||||
SegmLink() {}
|
||||
SegmLink(int from, int to, const SegmLinkVal& val)
|
||||
: from(from), to(to), val(val) {}
|
||||
SegmLink(int from_, int to_, const SegmLinkVal& val_)
|
||||
: from(from_), to(to_), val(val_) {}
|
||||
bool operator <(const SegmLink& other) const
|
||||
{
|
||||
return val < other.val;
|
||||
@ -182,10 +182,10 @@ inline int DjSets::merge(int set1, int set2)
|
||||
|
||||
|
||||
template <typename T>
|
||||
Graph<T>::Graph(int numv, int nume_max) : start(numv, -1), edges(nume_max)
|
||||
Graph<T>::Graph(int numv_, int nume_max_) : start(numv_, -1), edges(nume_max_)
|
||||
{
|
||||
this->numv = numv;
|
||||
this->nume_max = nume_max;
|
||||
this->numv = numv_;
|
||||
this->nume_max = nume_max_;
|
||||
nume = 0;
|
||||
}
|
||||
|
||||
|
@ -44,25 +44,25 @@
|
||||
#ifndef OPENCV_GPU_WARP_REDUCE_HPP__
|
||||
#define OPENCV_GPU_WARP_REDUCE_HPP__
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template <class T>
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template <class T>
|
||||
__device__ __forceinline__ T warp_reduce(volatile T *ptr , const unsigned int tid = threadIdx.x)
|
||||
{
|
||||
const unsigned int lane = tid & 31; // index of thread in warp (0..31)
|
||||
|
||||
if (lane < 16)
|
||||
{
|
||||
T partial = ptr[tid];
|
||||
|
||||
ptr[tid] = partial = partial + ptr[tid + 16];
|
||||
ptr[tid] = partial = partial + ptr[tid + 8];
|
||||
ptr[tid] = partial = partial + ptr[tid + 4];
|
||||
ptr[tid] = partial = partial + ptr[tid + 2];
|
||||
ptr[tid] = partial = partial + ptr[tid + 1];
|
||||
}
|
||||
if (lane < 16)
|
||||
{
|
||||
T partial = ptr[tid];
|
||||
|
||||
return ptr[tid - lane];
|
||||
ptr[tid] = partial = partial + ptr[tid + 16];
|
||||
ptr[tid] = partial = partial + ptr[tid + 8];
|
||||
ptr[tid] = partial = partial + ptr[tid + 4];
|
||||
ptr[tid] = partial = partial + ptr[tid + 2];
|
||||
ptr[tid] = partial = partial + ptr[tid + 1];
|
||||
}
|
||||
|
||||
return ptr[tid - lane];
|
||||
}
|
||||
}}} // namespace cv { namespace gpu { namespace device {
|
||||
|
||||
|
@ -171,8 +171,8 @@ static void csbp_operator(StereoConstantSpaceBP& rthis, GpuMat& mbuf, GpuMat& te
|
||||
{
|
||||
cols_pyr[i] = cols_pyr[i-1] / 2;
|
||||
rows_pyr[i] = rows_pyr[i-1] / 2;
|
||||
nr_plane_pyr[i] = nr_plane_pyr[i-1] * 2;
|
||||
}
|
||||
nr_plane_pyr[i] = nr_plane_pyr[i-1] * 2;
|
||||
}
|
||||
|
||||
|
||||
GpuMat u[2], d[2], l[2], r[2], disp_selected_pyr[2], data_cost, data_cost_selected;
|
||||
@ -193,14 +193,14 @@ static void csbp_operator(StereoConstantSpaceBP& rthis, GpuMat& mbuf, GpuMat& te
|
||||
GpuMat sub2 = sub1.rowRange((k+0)*sub1.rows/2, (k+1)*sub1.rows/2);
|
||||
|
||||
GpuMat *buf_ptrs[] = { &u[k], &d[k], &l[k], &r[k], &disp_selected_pyr[k] };
|
||||
for(int r = 0; r < 5; ++r)
|
||||
for(int _r = 0; _r < 5; ++_r)
|
||||
{
|
||||
*buf_ptrs[r] = sub2.rowRange(r * sub2.rows/5, (r+1) * sub2.rows/5);
|
||||
assert(buf_ptrs[r]->cols == cols && buf_ptrs[r]->rows == rows * rthis.nr_plane);
|
||||
*buf_ptrs[_r] = sub2.rowRange(_r * sub2.rows/5, (_r+1) * sub2.rows/5);
|
||||
assert(buf_ptrs[_r]->cols == cols && buf_ptrs[_r]->rows == rows * rthis.nr_plane);
|
||||
}
|
||||
};
|
||||
|
||||
size_t elem_step = mbuf.step / sizeof(T);
|
||||
size_t elem_step = mbuf.step / sizeof(T);
|
||||
|
||||
Size temp_size = data_cost.size();
|
||||
if ((size_t)temp_size.area() < elem_step * rows_pyr[levels - 1] * rthis.ndisp)
|
||||
|
@ -198,7 +198,7 @@ Trains an SVM with optimal parameters.
|
||||
|
||||
.. ocv:pyfunction:: cv2.SVM.train_auto(trainData, responses, varIdx, sampleIdx, params[, k_fold[, Cgrid[, gammaGrid[, pGrid[, nuGrid[, coeffGrid[, degreeGrid[, balanced]]]]]]]]) -> retval
|
||||
|
||||
:param k_fold: Cross-validation parameter. The training set is divided into ``k_fold`` subsets. One subset is used to train the model, the others form the test set. So, the SVM algorithm is executed ``k_fold`` times.
|
||||
:param k_fold: Cross-validation parameter. The training set is divided into ``k_fold`` subsets. One subset is used to test the model, the others form the train set. So, the SVM algorithm is executed ``k_fold`` times.
|
||||
|
||||
:param \*Grid: Iteration grid for the corresponding SVM parameter.
|
||||
|
||||
|
@ -1,8 +1,8 @@
|
||||
android update project --target android-11 --library ../../OpenCV-2.4.0/ --name "Sample - 15-puzzle" --path ./15-puzzle
|
||||
android update project --target android-11 --library ../../OpenCV-2.4.0/ --name "Sample - face-detection" --path ./face-detection
|
||||
android update project --target android-11 --library ../../OpenCV-2.4.0/ --name "Sample - image-manipulations" --path ./image-manipulations
|
||||
android update project --target android-11 --name "Tutorial 0 (Basic) - Android Camera" --path ./tutorial-0-androidcamera
|
||||
android update project --target android-11 --library ../../OpenCV-2.4.0/ --name "Tutorial 1 (Basic) - Add OpenCV" --path ./tutorial-1-addopencv
|
||||
android update project --target android-11 --library ../../OpenCV-2.4.0/ --name "Tutorial 2 (Basic) - Use OpenCV Camera" --path ./tutorial-2-opencvcamera
|
||||
android update project --target android-11 --name "Tutorial 3 (Advanced) - Add Native OpenCV" --path ./tutorial-3-native
|
||||
android update project --target android-11 --library ../../OpenCV-2.4.0/ --name "Tutorial 4 (Advanced) - Mix Java+Native OpenCV" --path ./tutorial-4-mixed
|
||||
call android update project --target android-11 --library ../../OpenCV-2.4.1/ --name "Sample - 15-puzzle" --path ./15-puzzle
|
||||
call android update project --target android-11 --library ../../OpenCV-2.4.1/ --name "Sample - face-detection" --path ./face-detection
|
||||
call android update project --target android-11 --library ../../OpenCV-2.4.1/ --name "Sample - image-manipulations" --path ./image-manipulations
|
||||
call android update project --target android-11 --name "Tutorial 0 (Basic) - Android Camera" --path ./tutorial-0-androidcamera
|
||||
call android update project --target android-11 --library ../../OpenCV-2.4.1/ --name "Tutorial 1 (Basic) - Add OpenCV" --path ./tutorial-1-addopencv
|
||||
call android update project --target android-11 --library ../../OpenCV-2.4.1/ --name "Tutorial 2 (Basic) - Use OpenCV Camera" --path ./tutorial-2-opencvcamera
|
||||
call android update project --target android-11 --name "Tutorial 3 (Advanced) - Add Native OpenCV" --path ./tutorial-3-native
|
||||
call android update project --target android-11 --library ../../OpenCV-2.4.1/ --name "Tutorial 4 (Advanced) - Mix Java+Native OpenCV" --path ./tutorial-4-mixed
|
@ -146,7 +146,7 @@ int main(int argc, char** argv)
|
||||
minGradMagnitudes[3] = 1;
|
||||
|
||||
const float minDepth = 0.f; //in meters
|
||||
const float maxDepth = 3.f; //in meters
|
||||
const float maxDepth = 4.f; //in meters
|
||||
const float maxDepthDiff = 0.07f; //in meters
|
||||
|
||||
tm.start();
|
||||
|
44
samples/python2/kmeans.py
Normal file
44
samples/python2/kmeans.py
Normal file
@ -0,0 +1,44 @@
|
||||
'''
|
||||
K-means clusterization sample.
|
||||
Usage:
|
||||
kmeans.py
|
||||
|
||||
Keyboard shortcuts:
|
||||
ESC - exit
|
||||
space - generate new distribution
|
||||
'''
|
||||
|
||||
import numpy as np
|
||||
import cv2
|
||||
|
||||
from gaussian_mix import make_gaussians
|
||||
|
||||
if __name__ == '__main__':
|
||||
cluster_n = 5
|
||||
img_size = 512
|
||||
|
||||
print __doc__
|
||||
|
||||
# generating bright palette
|
||||
colors = np.zeros((1, cluster_n, 3), np.uint8)
|
||||
colors[0,:] = 255
|
||||
colors[0,:,0] = np.arange(0, 180, 180.0/cluster_n)
|
||||
colors = cv2.cvtColor(colors, cv2.COLOR_HSV2BGR)[0]
|
||||
|
||||
while True:
|
||||
print 'sampling distributions...'
|
||||
points, _ = make_gaussians(cluster_n, img_size)
|
||||
|
||||
term_crit = (cv2.TERM_CRITERIA_EPS, 30, 0.1)
|
||||
ret, labels, centers = cv2.kmeans(points, cluster_n, term_crit, 10, 0)
|
||||
|
||||
img = np.zeros((img_size, img_size, 3), np.uint8)
|
||||
for (x, y), label in zip(np.int32(points), labels.ravel()):
|
||||
c = map(int, colors[label])
|
||||
cv2.circle(img, (x, y), 1, c, -1)
|
||||
|
||||
cv2.imshow('gaussian mixture', img)
|
||||
ch = 0xFF & cv2.waitKey(0)
|
||||
if ch == 27:
|
||||
break
|
||||
cv2.destroyAllWindows()
|
Loading…
x
Reference in New Issue
Block a user