Merge remote-tracking branch 'origin/master'

This commit is contained in:
Vsevolod Glumov 2012-08-23 14:58:41 +04:00
commit 5648e49d59
169 changed files with 14121 additions and 9349 deletions

View File

@ -89,7 +89,7 @@ endif(WIN32)
ocv_warnings_disable(CMAKE_C_FLAGS -Wno-unused-but-set-variable -Wmissing-prototypes -Wmissing-declarations -Wundef -Wunused -Wsign-compare
-Wcast-align -Wshadow -Wno-maybe-uninitialized -Wno-pointer-to-int-cast -Wno-int-to-pointer-cast)
ocv_warnings_disable(CMAKE_CXX_FLAGS -Wmissing-declarations -Wunused-parameter /wd4100 /wd4244 /wd4706 /wd4127 /wd4701 /wd4018 /wd4267 /wd4306 /wd4305 /wd4312 /wd4311)
ocv_warnings_disable(CMAKE_CXX_FLAGS -Wmissing-declarations -Wunused-parameter /wd4100 /wd4244 /wd4706 /wd4127 /wd4701 /wd4018 /wd4267 /wd4306 /wd4305 /wd4312 /wd4311 /wd4703)
if(UNIX AND (CMAKE_COMPILER_IS_GNUCXX OR CV_ICC))
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC")

View File

@ -189,11 +189,11 @@ OCV_OPTION(ENABLE_PROFILING "Enable profiling in the GCC compiler (Add
OCV_OPTION(ENABLE_OMIT_FRAME_POINTER "Enable -fomit-frame-pointer for GCC" ON IF CMAKE_COMPILER_IS_GNUCXX )
OCV_OPTION(ENABLE_POWERPC "Enable PowerPC for GCC" ON IF (CMAKE_COMPILER_IS_GNUCXX AND CMAKE_SYSTEM_PROCESSOR MATCHES powerpc.*) )
OCV_OPTION(ENABLE_FAST_MATH "Enable -ffast-math (not recommended for GCC 4.6.x)" OFF IF (CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_SSE "Enable SSE instructions" ON IF (MSVC OR CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_SSE2 "Enable SSE2 instructions" ON IF (MSVC OR CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_SSE3 "Enable SSE3 instructions" OFF IF (CV_ICC OR CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_SSE "Enable SSE instructions" ON IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_SSE2 "Enable SSE2 instructions" ON IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_SSE3 "Enable SSE3 instructions" ON IF ((CV_ICC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_SSSE3 "Enable SSSE3 instructions" OFF IF (CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_SSE41 "Enable SSE4.1 instructions" OFF IF (CV_ICC OR CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_SSE41 "Enable SSE4.1 instructions" OFF IF ((CV_ICC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_SSE42 "Enable SSE4.2 instructions" OFF IF (CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_NOISY_WARNINGS "Show all warnings even if they are too noisy" OFF )
OCV_OPTION(OPENCV_WARNINGS_ARE_ERRORS "Treat warnings as errors" OFF )
@ -336,6 +336,7 @@ include(cmake/OpenCVCompilerOptions.cmake REQUIRED)
# ----------------------------------------------------------------------------
if(MSVC)
include(cmake/OpenCVCRTLinkage.cmake REQUIRED)
add_definitions(-D_VARIADIC_MAX=10)
endif(MSVC)

View File

@ -1,6 +1,6 @@
# ------------------------------------------------------------------------------
# Android CMake toolchain file, for use with the Android NDK r5-r8
# Requires cmake 2.6.3 or newer (2.8.3 or newer is recommended).
# Requires cmake 2.6.3 or newer (2.8.5 or newer is recommended).
# See home page: http://code.google.com/p/android-cmake/
#
# The file is mantained by the OpenCV project. And also can be found at
@ -44,7 +44,8 @@
# ANDROID_ABI=armeabi-v7a - specifies the target Application Binary
# Interface (ABI). This option nearly matches to the APP_ABI variable
# used by ndk-build tool from Android NDK.
# Possible values are:
#
# Possible targets are:
# "armeabi" - matches to the NDK ABI with the same name.
# See ${ANDROID_NDK}/docs/CPU-ARCH-ABIS.html for the documentation.
# "armeabi-v7a" - matches to the NDK ABI with the same name.
@ -56,6 +57,8 @@
# "armeabi-v6 with VFP" - tuned for ARMv6 processors having VFP.
# "x86" - matches to the NDK ABI with the same name.
# See ${ANDROID_NDK}/docs/CPU-ARCH-ABIS.html for the documentation.
# "mips" - matches to the NDK ABI with the same name
# (not testes on real devices)
#
# ANDROID_NATIVE_API_LEVEL=android-8 - level of Android API compile for.
# Option is read-only when standalone toolchain used.
@ -183,12 +186,13 @@
# - modified August 2012
# [+] updated for NDK r8b
# [~] all intermediate files generated by toolchain are moved into CMakeFiles
# [~] libstdc++ and libsupc are removed from explicit link libraries
# ------------------------------------------------------------------------------
cmake_minimum_required( VERSION 2.6.3 )
if( DEFINED CMAKE_CROSSCOMPILING )
#subsequent toolchain loading is not really needed
# subsequent toolchain loading is not really needed
return()
endif()
@ -199,7 +203,7 @@ endif()
# this one is important
set( CMAKE_SYSTEM_NAME Linux )
#this one not so much
# this one not so much
set( CMAKE_SYSTEM_VERSION 1 )
set( ANDROID_SUPPORTED_NDK_VERSIONS ${ANDROID_EXTRA_NDK_VERSIONS} -r8b -r8 -r7c -r7b -r7 -r6b -r6 -r5c -r5b -r5 "" )
@ -331,11 +335,11 @@ macro( __COPY_IF_DIFFERENT _source _destination )
endmacro()
#stl version: by default gnustl_static will be used
# stl version: by default gnustl_static will be used
set( ANDROID_USE_STLPORT FALSE CACHE BOOL "Experimental: use stlport_static instead of gnustl_static")
mark_as_advanced( ANDROID_USE_STLPORT )
#fight against cygwin
# fight against cygwin
set( ANDROID_FORBID_SYGWIN TRUE CACHE BOOL "Prevent cmake from working under cygwin and using cygwin tools")
mark_as_advanced( ANDROID_FORBID_SYGWIN )
if( ANDROID_FORBID_SYGWIN )
@ -344,7 +348,7 @@ if( ANDROID_FORBID_SYGWIN )
endif()
if( CMAKE_HOST_WIN32 )
#remove cygwin from PATH
# remove cygwin from PATH
set( __new_path "$ENV{PATH}")
__LIST_FILTER( __new_path "cygwin" )
set(ENV{PATH} "${__new_path}")
@ -352,7 +356,7 @@ if( ANDROID_FORBID_SYGWIN )
endif()
endif()
#detect current host platform
# detect current host platform
set( TOOL_OS_SUFFIX "" )
if( CMAKE_HOST_APPLE )
set( ANDROID_NDK_HOST_SYSTEM_NAME "darwin-x86" )
@ -365,10 +369,10 @@ else()
message( FATAL_ERROR "Cross-compilation on your platform is not supported by this cmake toolchain" )
endif()
#see if we have path to Android NDK
# see if we have path to Android NDK
__INIT_VARIABLE( ANDROID_NDK PATH ENV_ANDROID_NDK )
if( NOT ANDROID_NDK )
#see if we have path to Android standalone toolchain
# see if we have path to Android standalone toolchain
__INIT_VARIABLE( ANDROID_STANDALONE_TOOLCHAIN PATH ENV_ANDROID_STANDALONE_TOOLCHAIN OBSOLETE_ANDROID_NDK_TOOLCHAIN_ROOT OBSOLETE_ENV_ANDROID_NDK_TOOLCHAIN_ROOT )
if( NOT ANDROID_STANDALONE_TOOLCHAIN )
@ -397,10 +401,10 @@ if( NOT ANDROID_NDK )
endif( NOT ANDROID_STANDALONE_TOOLCHAIN )
endif( NOT ANDROID_NDK )
#remember found paths
# remember found paths
if( ANDROID_NDK )
get_filename_component( ANDROID_NDK "${ANDROID_NDK}" ABSOLUTE )
#try to detect change
# try to detect change
if( CMAKE_AR )
string( LENGTH "${ANDROID_NDK}" __length )
string( SUBSTRING "${CMAKE_AR}" 0 ${__length} __androidNdkPreviousPath )
@ -414,7 +418,7 @@ if( ANDROID_NDK )
set( BUILD_WITH_ANDROID_NDK True )
elseif( ANDROID_STANDALONE_TOOLCHAIN )
get_filename_component( ANDROID_STANDALONE_TOOLCHAIN "${ANDROID_STANDALONE_TOOLCHAIN}" ABSOLUTE )
#try to detect change
# try to detect change
if( CMAKE_AR )
string( LENGTH "${ANDROID_STANDALONE_TOOLCHAIN}" __length )
string( SUBSTRING "${CMAKE_AR}" 0 ${__length} __androidStandaloneToolchainPreviousPath )
@ -438,7 +442,7 @@ else()
sudo ln -s ~/my-android-toolchain ${ANDROID_STANDALONE_TOOLCHAIN_SEARCH_PATH}" )
endif()
#get all the details about standalone toolchain
# get all the details about standalone toolchain
if( BUILD_WITH_STANDALONE_TOOLCHAIN )
__DETECT_NATIVE_API_LEVEL( ANDROID_SUPPORTED_NATIVE_API_LEVELS "${ANDROID_STANDALONE_TOOLCHAIN}/sysroot/usr/include/android/api-level.h" )
set( ANDROID_STANDALONE_TOOLCHAIN_API_LEVEL ${ANDROID_SUPPORTED_NATIVE_API_LEVELS} )
@ -455,7 +459,7 @@ if( BUILD_WITH_STANDALONE_TOOLCHAIN )
set( __availableToolchainArchs "mipsel" )
endif()
if( ANDROID_COMPILER_VERSION )
#do not run gcc every time because it is relatevely expencive
# do not run gcc every time because it is relatevely expencive
set( __availableToolchainCompilerVersions "${ANDROID_COMPILER_VERSION}" )
else()
execute_process( COMMAND "${ANDROID_STANDALONE_TOOLCHAIN}/bin/${__availableToolchainMachines}-gcc${TOOL_OS_SUFFIX}" --version
@ -464,7 +468,7 @@ if( BUILD_WITH_STANDALONE_TOOLCHAIN )
endif()
endif()
#get all the details about NDK
# get all the details about NDK
if( BUILD_WITH_ANDROID_NDK )
file( GLOB ANDROID_SUPPORTED_NATIVE_API_LEVELS RELATIVE "${ANDROID_NDK}/platforms" "${ANDROID_NDK}/platforms/android-*" )
string( REPLACE "android-" "" ANDROID_SUPPORTED_NATIVE_API_LEVELS "${ANDROID_SUPPORTED_NATIVE_API_LEVELS}" )
@ -490,7 +494,7 @@ if( BUILD_WITH_ANDROID_NDK )
endif()
endif()
#build list of available ABIs
# build list of available ABIs
if( NOT ANDROID_SUPPORTED_ABIS )
set( ANDROID_SUPPORTED_ABIS "" )
set( __uniqToolchainArchNames ${__availableToolchainArchs} )
@ -505,9 +509,9 @@ if( NOT ANDROID_SUPPORTED_ABIS )
endif()
endif()
#choose target ABI
# choose target ABI
__INIT_VARIABLE( ANDROID_ABI OBSOLETE_ARM_TARGET OBSOLETE_ARM_TARGETS VALUES ${ANDROID_SUPPORTED_ABIS} )
#verify that target ABI is supported
# verify that target ABI is supported
list( FIND ANDROID_SUPPORTED_ABIS "${ANDROID_ABI}" __androidAbiIdx )
if( __androidAbiIdx EQUAL -1 )
string( REPLACE ";" "\", \"", PRINTABLE_ANDROID_SUPPORTED_ABIS "${ANDROID_SUPPORTED_ABIS}" )
@ -517,10 +521,10 @@ if( __androidAbiIdx EQUAL -1 )
endif()
unset( __androidAbiIdx )
#remember target ABI
# remember target ABI
set( ANDROID_ABI "${ANDROID_ABI}" CACHE STRING "The target ABI for Android. If arm, then armeabi-v7a is recommended for hardware floating point." FORCE )
#set target ABI options
# set target ABI options
if( ANDROID_ABI STREQUAL "x86" )
set( X86 true )
set( ANDROID_NDK_ABI_NAME "x86" )
@ -545,7 +549,7 @@ elseif( ANDROID_ABI STREQUAL "armeabi-v6 with VFP" )
set( ANDROID_ARCH_NAME "arm" )
set( ANDROID_ARCH_FULLNAME "arm" )
set( CMAKE_SYSTEM_PROCESSOR "armv6" )
#need always fallback to older platform
# need always fallback to older platform
set( ARMEABI true )
elseif( ANDROID_ABI STREQUAL "armeabi-v7a")
set( ARMEABI_V7A true )
@ -573,8 +577,8 @@ else()
endif()
if( CMAKE_BINARY_DIR AND EXISTS "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeSystem.cmake" )
#really dirty hack
#it is not possible to change CMAKE_SYSTEM_PROCESSOR after the first run...
# really dirty hack
# it is not possible to change CMAKE_SYSTEM_PROCESSOR after the first run...
file( APPEND "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeSystem.cmake" "SET(CMAKE_SYSTEM_PROCESSOR \"${CMAKE_SYSTEM_PROCESSOR}\")\n" )
endif()
@ -592,7 +596,7 @@ else()
unset( ANDROID_FORCE_ARM_BUILD CACHE )
endif()
#choose toolchain
# choose toolchain
if( ANDROID_TOOLCHAIN_NAME )
list( FIND __availableToolchains "${ANDROID_TOOLCHAIN_NAME}" __toolchainIdx )
if( __toolchainIdx EQUAL -1 )
@ -637,10 +641,10 @@ unset( __availableToolchainMachines )
unset( __availableToolchainArchs )
unset( __availableToolchainCompilerVersions )
#choose native API level
# choose native API level
__INIT_VARIABLE( ANDROID_NATIVE_API_LEVEL ENV_ANDROID_NATIVE_API_LEVEL ANDROID_API_LEVEL ENV_ANDROID_API_LEVEL ANDROID_STANDALONE_TOOLCHAIN_API_LEVEL ANDROID_DEFAULT_NDK_API_LEVEL_${ANDROID_ARCH_NAME} ANDROID_DEFAULT_NDK_API_LEVEL )
string( REGEX MATCH "[0-9]+" ANDROID_NATIVE_API_LEVEL "${ANDROID_NATIVE_API_LEVEL}" )
#validate
# validate
list( FIND ANDROID_SUPPORTED_NATIVE_API_LEVELS "${ANDROID_NATIVE_API_LEVEL}" __levelIdx )
if( __levelIdx EQUAL -1 )
message( SEND_ERROR "Specified Android native API level (${ANDROID_NATIVE_API_LEVEL}) is not supported by your NDK/toolchain." )
@ -659,7 +663,7 @@ if( CMAKE_VERSION VERSION_GREATER "2.8" )
set_property( CACHE ANDROID_NATIVE_API_LEVEL PROPERTY STRINGS ${ANDROID_SUPPORTED_NATIVE_API_LEVELS} )
endif()
#setup paths
# setup paths
if( BUILD_WITH_STANDALONE_TOOLCHAIN )
set( ANDROID_TOOLCHAIN_ROOT "${ANDROID_STANDALONE_TOOLCHAIN}" )
set( ANDROID_SYSROOT "${ANDROID_STANDALONE_TOOLCHAIN}/sysroot" )
@ -689,7 +693,7 @@ set( CMAKE_ASM_COMPILER "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHI
if( CMAKE_VERSION VERSION_LESS 2.8.5 )
set( CMAKE_ASM_COMPILER_ARG1 "-c" )
endif()
#there may be a way to make cmake deduce these TODO deduce the rest of the tools
# there may be a way to make cmake deduce these TODO deduce the rest of the tools
set( CMAKE_STRIP "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-strip${TOOL_OS_SUFFIX}" CACHE PATH "strip" )
set( CMAKE_AR "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-ar${TOOL_OS_SUFFIX}" CACHE PATH "archive" )
set( CMAKE_LINKER "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-ld${TOOL_OS_SUFFIX}" CACHE PATH "linker" )
@ -705,11 +709,12 @@ if( APPLE )
endif()
mark_as_advanced( CMAKE_INSTALL_NAME_TOOL )
endif()
#export directories
# export directories
set( ANDROID_SYSTEM_INCLUDE_DIRS "" )
set( ANDROID_SYSTEM_LIB_DIRS "" )
#setup output directories
# setup output directories
set( LIBRARY_OUTPUT_PATH_ROOT ${CMAKE_SOURCE_DIR} CACHE PATH "root for library output, set this to change where android libs are installed to" )
set( CMAKE_INSTALL_PREFIX "${ANDROID_TOOLCHAIN_ROOT}/user" CACHE STRING "path for installing" )
@ -722,13 +727,13 @@ if(NOT _CMAKE_IN_TRY_COMPILE)
set( LIBRARY_OUTPUT_PATH "${LIBRARY_OUTPUT_PATH_ROOT}/libs/${ANDROID_NDK_ABI_NAME}" CACHE PATH "path for android libs" )
endif()
#includes
# includes
list( APPEND ANDROID_SYSTEM_INCLUDE_DIRS "${ANDROID_SYSROOT}/usr/include" )
if( __stlIncludePath AND EXISTS "${__stlIncludePath}" )
list( APPEND ANDROID_SYSTEM_INCLUDE_DIRS "${__stlIncludePath}" )
endif()
#STL bits includes
# c++ bits includes
if( __stlLibPath AND EXISTS "${__stlLibPath}/include" )
list( APPEND ANDROID_SYSTEM_INCLUDE_DIRS "${__stlLibPath}/include" )
endif()
@ -742,7 +747,7 @@ elseif( EXISTS "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/incl
list( APPEND ANDROID_SYSTEM_INCLUDE_DIRS "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/include/c++/${ANDROID_COMPILER_VERSION}/${ANDROID_TOOLCHAIN_MACHINE_NAME}" )
endif()
#flags and definitions
# flags and definitions
if(ANDROID_SYSROOT MATCHES "[ ;\"]")
set( ANDROID_CXX_FLAGS "--sysroot=\"${ANDROID_SYSROOT}\"" )
# quotes will break try_compile and compiler identification
@ -766,7 +771,7 @@ set( CMAKE_CXX_PLATFORM_ID Linux )
set( CMAKE_CXX_SIZEOF_DATA_PTR 4 )
set( CMAKE_CXX_HAS_ISYSROOT 1 )
set( CMAKE_CXX_COMPILER_ABI ELF )
#force ASM compiler (required for CMake < 2.8.5)
# force ASM compiler (required for CMake < 2.8.5)
set( CMAKE_ASM_COMPILER_ID_RUN TRUE )
set( CMAKE_ASM_COMPILER_ID GNU )
set( CMAKE_ASM_COMPILER_WORKS TRUE )
@ -796,17 +801,17 @@ endif()
if( ANDROID_USE_STLPORT )
set( _CMAKE_CXX_FLAGS "${_CMAKE_CXX_FLAGS} -fno-rtti -fno-exceptions" )
set( _CMAKE_C_FLAGS "${_CMAKE_C_FLAGS} -fno-rtti -fno-exceptions" )
set( _CMAKE_C_FLAGS "${_CMAKE_C_FLAGS} -fno-exceptions" )
else()
set( _CMAKE_CXX_FLAGS "${_CMAKE_CXX_FLAGS} -frtti -fexceptions" )
set( _CMAKE_C_FLAGS "${_CMAKE_C_FLAGS} -fexceptions" )
endif()
#release and debug flags
# release and debug flags
if( ARMEABI OR ARMEABI_V7A )
if( NOT ANDROID_FORCE_ARM_BUILD AND NOT ARMEABI_V6 )
#It is recommended to use the -mthumb compiler flag to force the generation
#of 16-bit Thumb-1 instructions (the default being 32-bit ARM ones).
# It is recommended to use the -mthumb compiler flag to force the generation
# of 16-bit Thumb-1 instructions (the default being 32-bit ARM ones).
# O3 instead of O2/Os in release mode - like cmake sets for desktop gcc
set( _CMAKE_CXX_FLAGS_RELEASE "-mthumb -O3" )
set( _CMAKE_C_FLAGS_RELEASE "-mthumb -O3" )
@ -836,7 +841,7 @@ set( _CMAKE_C_FLAGS_RELEASE "${_CMAKE_C_FLAGS_RELEASE} -fomit-frame-pointer
set( _CMAKE_CXX_FLAGS_DEBUG "${_CMAKE_CXX_FLAGS_DEBUG} -fno-strict-aliasing -fno-omit-frame-pointer -DDEBUG -D_DEBUG" )
set( _CMAKE_C_FLAGS_DEBUG "${_CMAKE_C_FLAGS_DEBUG} -fno-strict-aliasing -fno-omit-frame-pointer -DDEBUG -D_DEBUG" )
#ABI-specific flags
# ABI-specific flags
if( ARMEABI_V7A )
set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -march=armv7-a -mfloat-abi=softfp" )
if( NEON )
@ -854,19 +859,18 @@ elseif( X86 )
set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS}" )#sse?
endif()
#linker flags
# linker flags
if( NOT DEFINED __ndklibspath )
set( __ndklibspath "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/ndklibs/${ANDROID_NDK_ABI_NAME}" )
endif()
list( APPEND ANDROID_SYSTEM_LIB_DIRS "${__ndklibspath}" "${CMAKE_INSTALL_PREFIX}/libs/${ANDROID_NDK_ABI_NAME}" )
list( APPEND ANDROID_SYSTEM_LIB_DIRS "${CMAKE_INSTALL_PREFIX}/libs/${ANDROID_NDK_ABI_NAME}" )
set( ANDROID_LINKER_FLAGS "" )
#STL
# STL
if( ANDROID_USE_STLPORT )
if( EXISTS "${__stlLibPath}/libstlport_static.a" )
__COPY_IF_DIFFERENT( "${__stlLibPath}/libstlport_static.a" "${__ndklibspath}/libstlport_static.a" )
endif()
if( EXISTS "${__ndklibspath}/libstlport_static.a" )
set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -Wl,--start-group -lstlport_static" )
set( CMAKE_CXX_CREATE_SHARED_LIBRARY "<CMAKE_CXX_COMPILER> <CMAKE_SHARED_LIBRARY_CXX_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS> <SONAME_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES> \"${__stlLibPath}/libstlport_static.a\"")
set( CMAKE_CXX_CREATE_SHARED_MODULE "<CMAKE_CXX_COMPILER> <CMAKE_SHARED_LIBRARY_CXX_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS> <SONAME_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES> \"${__stlLibPath}/libstlport_static.a\"")
endif()
else( ANDROID_USE_STLPORT )
if( EXISTS "${__stlLibPath}/libgnustl_static.a" )
@ -880,11 +884,6 @@ else( ANDROID_USE_STLPORT )
elseif( EXISTS "${__stlLibPath}/libstdc++.a" )
__COPY_IF_DIFFERENT( "${__stlLibPath}/libstdc++.a" "${__ndklibspath}/libstdc++.a" )
endif()
if( EXISTS "${__ndklibspath}/libstdc++.a" )
set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -lstdc++" )
endif()
#gcc exception & rtti support
if( EXISTS "${__stlLibPath}/libsupc++.a" )
__COPY_IF_DIFFERENT( "${__stlLibPath}/libsupc++.a" "${__ndklibspath}/libsupc++.a" )
elseif( ANDROID_ARCH_NAME STREQUAL "arm" AND EXISTS "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/thumb/libsupc++.a" )
@ -896,16 +895,14 @@ else( ANDROID_USE_STLPORT )
elseif( EXISTS "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/libsupc++.a" )
__COPY_IF_DIFFERENT( "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/libsupc++.a" "${__ndklibspath}/libsupc++.a" )
endif()
if( EXISTS "${__ndklibspath}/libsupc++.a" )
set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -lsupc++" )
endif()
list( APPEND ANDROID_SYSTEM_LIB_DIRS "${__ndklibspath}" )
endif( ANDROID_USE_STLPORT )
#cleanup for STL search
# cleanup for STL search
unset( __stlIncludePath )
unset( __stlLibPath )
#other linker flags
# other linker flags
__INIT_VARIABLE( ANDROID_NO_UNDEFINED OBSOLETE_NO_UNDEFINED VALUES ON )
set( ANDROID_NO_UNDEFINED ${ANDROID_NO_UNDEFINED} CACHE BOOL "Show all undefined symbols as linker errors" FORCE )
mark_as_advanced( ANDROID_NO_UNDEFINED )
@ -914,7 +911,7 @@ if( ANDROID_NO_UNDEFINED )
endif()
if (ANDROID_NDK MATCHES "-r[56].?$")
#libGLESv2.so in NDK's prior to r7 refers to exteranal symbols. So this flag option is required for all projects using OpenGL from native.
# libGLESv2.so in NDK's prior to r7 refers to exteranal symbols. So this flag option is required for all projects using OpenGL from native.
__INIT_VARIABLE( ANDROID_SO_UNDEFINED VALUES ON )
else()
__INIT_VARIABLE( ANDROID_SO_UNDEFINED VALUES OFF )
@ -940,7 +937,7 @@ if( ARMEABI_V7A )
set( ANDROID_LINKER_FLAGS "-Wl,--fix-cortex-a8 ${ANDROID_LINKER_FLAGS}" )
endif()
#cache flags
# cache flags
set( CMAKE_CXX_FLAGS "${_CMAKE_CXX_FLAGS}" CACHE STRING "c++ flags" )
set( CMAKE_C_FLAGS "${_CMAKE_C_FLAGS}" CACHE STRING "c flags" )
set( CMAKE_CXX_FLAGS_RELEASE "${_CMAKE_CXX_FLAGS_RELEASE}" CACHE STRING "c++ Release flags" )
@ -954,7 +951,7 @@ set( CMAKE_EXE_LINKER_FLAGS "-Wl,-z,nocopyreloc" CACHE STRING "linker flags" )
include_directories( SYSTEM ${ANDROID_SYSTEM_INCLUDE_DIRS} )
link_directories( ${ANDROID_SYSTEM_LIB_DIRS} )
#finish flags
# finish flags
set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS}" CACHE INTERNAL "Extra Android compiler flags")
set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS}" CACHE INTERNAL "Extra Android linker flags")
set( CMAKE_CXX_FLAGS "${ANDROID_CXX_FLAGS} ${CMAKE_CXX_FLAGS}" )
@ -969,7 +966,7 @@ else()
set( CMAKE_EXE_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} ${CMAKE_EXE_LINKER_FLAGS}" )
endif()
#set these global flags for cmake client scripts to change behavior
# set these global flags for cmake client scripts to change behavior
set( ANDROID True )
set( BUILD_ANDROID True )
@ -982,7 +979,7 @@ set( CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY )
set( CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY )
#macro to find packages on the host OS
# macro to find packages on the host OS
macro( find_host_package )
set( CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER )
set( CMAKE_FIND_ROOT_PATH_MODE_LIBRARY NEVER )
@ -1004,7 +1001,7 @@ macro( find_host_package )
endmacro()
#macro to find programs on the host OS
# macro to find programs on the host OS
macro( find_host_program )
set( CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER )
set( CMAKE_FIND_ROOT_PATH_MODE_LIBRARY NEVER )
@ -1044,7 +1041,11 @@ if( NOT PROJECT_NAME STREQUAL "CMAKE_TRY_COMPILE" )
set( __toolchain_config "")
foreach( __var ANDROID_ABI ANDROID_FORCE_ARM_BUILD ANDROID_NATIVE_API_LEVEL ANDROID_NO_UNDEFINED ANDROID_SO_UNDEFINED ANDROID_SET_OBSOLETE_VARIABLES LIBRARY_OUTPUT_PATH_ROOT ANDROID_USE_STLPORT ANDROID_FORBID_SYGWIN ANDROID_NDK ANDROID_STANDALONE_TOOLCHAIN ANDROID_FUNCTION_LEVEL_LINKING __ndklibspath )
if( DEFINED ${__var} )
set( __toolchain_config "${__toolchain_config}set( ${__var} \"${${__var}}\" )\n" )
if( "${__var}" MATCHES " ")
set( __toolchain_config "${__toolchain_config}set( ${__var} \"${${__var}}\" CACHE INTERNAL \"\" )\n" )
else()
set( __toolchain_config "${__toolchain_config}set( ${__var} ${${__var}} CACHE INTERNAL \"\" )\n" )
endif()
endif()
endforeach()
file( WRITE "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/android.toolchain.config.cmake" "${__toolchain_config}" )

View File

@ -0,0 +1,138 @@
# CMAKE_PARSE_ARGUMENTS(<prefix> <options> <one_value_keywords> <multi_value_keywords> args...)
#
# CMAKE_PARSE_ARGUMENTS() is intended to be used in macros or functions for
# parsing the arguments given to that macro or function.
# It processes the arguments and defines a set of variables which hold the
# values of the respective options.
#
# The <options> argument contains all options for the respective macro,
# i.e. keywords which can be used when calling the macro without any value
# following, like e.g. the OPTIONAL keyword of the install() command.
#
# The <one_value_keywords> argument contains all keywords for this macro
# which are followed by one value, like e.g. DESTINATION keyword of the
# install() command.
#
# The <multi_value_keywords> argument contains all keywords for this macro
# which can be followed by more than one value, like e.g. the TARGETS or
# FILES keywords of the install() command.
#
# When done, CMAKE_PARSE_ARGUMENTS() will have defined for each of the
# keywords listed in <options>, <one_value_keywords> and
# <multi_value_keywords> a variable composed of the given <prefix>
# followed by "_" and the name of the respective keyword.
# These variables will then hold the respective value from the argument list.
# For the <options> keywords this will be TRUE or FALSE.
#
# All remaining arguments are collected in a variable
# <prefix>_UNPARSED_ARGUMENTS, this can be checked afterwards to see whether
# your macro was called with unrecognized parameters.
#
# As an example here a my_install() macro, which takes similar arguments as the
# real install() command:
#
# function(MY_INSTALL)
# set(options OPTIONAL FAST)
# set(oneValueArgs DESTINATION RENAME)
# set(multiValueArgs TARGETS CONFIGURATIONS)
# cmake_parse_arguments(MY_INSTALL "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN} )
# ...
#
# Assume my_install() has been called like this:
# my_install(TARGETS foo bar DESTINATION bin OPTIONAL blub)
#
# After the cmake_parse_arguments() call the macro will have set the following
# variables:
# MY_INSTALL_OPTIONAL = TRUE
# MY_INSTALL_FAST = FALSE (this option was not used when calling my_install()
# MY_INSTALL_DESTINATION = "bin"
# MY_INSTALL_RENAME = "" (was not used)
# MY_INSTALL_TARGETS = "foo;bar"
# MY_INSTALL_CONFIGURATIONS = "" (was not used)
# MY_INSTALL_UNPARSED_ARGUMENTS = "blub" (no value expected after "OPTIONAL"
#
# You can the continue and process these variables.
#
# Keywords terminate lists of values, e.g. if directly after a one_value_keyword
# another recognized keyword follows, this is interpreted as the beginning of
# the new option.
# E.g. my_install(TARGETS foo DESTINATION OPTIONAL) would result in
# MY_INSTALL_DESTINATION set to "OPTIONAL", but MY_INSTALL_DESTINATION would
# be empty and MY_INSTALL_OPTIONAL would be set to TRUE therefor.
#=============================================================================
# Copyright 2010 Alexander Neundorf <neundorf@kde.org>
#
# Distributed under the OSI-approved BSD License (the "License");
# see accompanying file Copyright.txt for details.
#
# This software is distributed WITHOUT ANY WARRANTY; without even the
# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the License for more information.
#=============================================================================
# (To distribute this file outside of CMake, substitute the full
# License text for the above reference.)
if(__CMAKE_PARSE_ARGUMENTS_INCLUDED)
return()
endif()
set(__CMAKE_PARSE_ARGUMENTS_INCLUDED TRUE)
function(CMAKE_PARSE_ARGUMENTS prefix _optionNames _singleArgNames _multiArgNames)
# first set all result variables to empty/FALSE
foreach(arg_name ${_singleArgNames} ${_multiArgNames})
set(${prefix}_${arg_name})
endforeach(arg_name)
foreach(option ${_optionNames})
set(${prefix}_${option} FALSE)
endforeach(option)
set(${prefix}_UNPARSED_ARGUMENTS)
set(insideValues FALSE)
set(currentArgName)
# now iterate over all arguments and fill the result variables
foreach(currentArg ${ARGN})
list(FIND _optionNames "${currentArg}" optionIndex) # ... then this marks the end of the arguments belonging to this keyword
list(FIND _singleArgNames "${currentArg}" singleArgIndex) # ... then this marks the end of the arguments belonging to this keyword
list(FIND _multiArgNames "${currentArg}" multiArgIndex) # ... then this marks the end of the arguments belonging to this keyword
if(${optionIndex} EQUAL -1 AND ${singleArgIndex} EQUAL -1 AND ${multiArgIndex} EQUAL -1)
if(insideValues)
if("${insideValues}" STREQUAL "SINGLE")
set(${prefix}_${currentArgName} ${currentArg})
set(insideValues FALSE)
elseif("${insideValues}" STREQUAL "MULTI")
list(APPEND ${prefix}_${currentArgName} ${currentArg})
endif()
else(insideValues)
list(APPEND ${prefix}_UNPARSED_ARGUMENTS ${currentArg})
endif(insideValues)
else()
if(NOT ${optionIndex} EQUAL -1)
set(${prefix}_${currentArg} TRUE)
set(insideValues FALSE)
elseif(NOT ${singleArgIndex} EQUAL -1)
set(currentArgName ${currentArg})
set(${prefix}_${currentArgName})
set(insideValues "SINGLE")
elseif(NOT ${multiArgIndex} EQUAL -1)
set(currentArgName ${currentArg})
set(${prefix}_${currentArgName})
set(insideValues "MULTI")
endif()
endif()
endforeach(currentArg)
# propagate the result variables to the caller:
foreach(arg_name ${_singleArgNames} ${_multiArgNames} ${_optionNames})
set(${prefix}_${arg_name} ${${prefix}_${arg_name}} PARENT_SCOPE)
endforeach(arg_name)
set(${prefix}_UNPARSED_ARGUMENTS ${${prefix}_UNPARSED_ARGUMENTS} PARENT_SCOPE)
endfunction(CMAKE_PARSE_ARGUMENTS _options _singleArgs _multiArgs)

View File

@ -88,7 +88,11 @@ if(CUDA_FOUND)
if(APPLE)
set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Xcompiler -fno-finite-math-only)
endif()
string(REPLACE "-Wsign-promo" "" CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS}")
# disabled because of multiple warnings during building nvcc auto generated files
if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_GCC_REGEX_VERSION VERSION_GREATER "4.6.0")
ocv_warnings_disable(CMAKE_CXX_FLAGS -Wunused-but-set-variable)
endif()
# we remove -ggdb3 flag as it leads to preprocessor errors when compiling CUDA files (CUDA 4.1)
set(CMAKE_CXX_FLAGS_DEBUG_ ${CMAKE_CXX_FLAGS_DEBUG})

View File

@ -21,7 +21,12 @@ elseif(UNIX AND NOT APPLE)
endif()
if(NOT HAVE_TBB)
set(TBB_DEFAULT_INCLUDE_DIRS "/opt/intel/tbb" "/usr/local/include" "/usr/include" "C:/Program Files/Intel/TBB" "C:/Program Files (x86)/Intel/TBB" "C:/Program Files (x86)/TBB" "${CMAKE_INSTALL_PREFIX}/include")
set(TBB_DEFAULT_INCLUDE_DIRS
"/opt/intel/tbb" "/usr/local/include" "/usr/include"
"C:/Program Files/Intel/TBB" "C:/Program Files (x86)/Intel/TBB"
"C:/Program Files (x86)/tbb/include"
"C:/Program Files (x86)/tbb/include"
"${CMAKE_INSTALL_PREFIX}/include")
find_path(TBB_INCLUDE_DIRS "tbb/tbb.h" PATHS ${TBB_INCLUDE_DIR} ${TBB_DEFAULT_INCLUDE_DIRS} DOC "The path to TBB headers")
if(TBB_INCLUDE_DIRS)

View File

@ -64,9 +64,14 @@ macro(ocv_generate_dependencies_map_configcmake suffix configuration)
string(REGEX REPLACE "${CMAKE_SHARED_LIBRARY_SUFFIX}$" "${OPENCV_LINK_LIBRARY_SUFFIX}" __libname "${__libname}")
endif()
string(REPLACE " " "\\ " __mod_deps "${${__ocv_lib}_MODULE_DEPS_${suffix}}")
string(REPLACE " " "\\ " __ext_deps "${${__ocv_lib}_EXTRA_DEPS_${suffix}}")
string(REPLACE "\"" "\\\"" __mod_deps "${__mod_deps}")
string(REPLACE "\"" "\\\"" __ext_deps "${__ext_deps}")
set(OPENCV_DEPENDENCIES_MAP_${suffix} "${OPENCV_DEPENDENCIES_MAP_${suffix}}set(OpenCV_${__ocv_lib}_LIBNAME_${suffix} \"${__libname}\")\n")
set(OPENCV_DEPENDENCIES_MAP_${suffix} "${OPENCV_DEPENDENCIES_MAP_${suffix}}set(OpenCV_${__ocv_lib}_DEPS_${suffix} ${${__ocv_lib}_MODULE_DEPS_${suffix}})\n")
set(OPENCV_DEPENDENCIES_MAP_${suffix} "${OPENCV_DEPENDENCIES_MAP_${suffix}}set(OpenCV_${__ocv_lib}_EXTRA_DEPS_${suffix} ${${__ocv_lib}_EXTRA_DEPS_${suffix}})\n")
set(OPENCV_DEPENDENCIES_MAP_${suffix} "${OPENCV_DEPENDENCIES_MAP_${suffix}}set(OpenCV_${__ocv_lib}_DEPS_${suffix} ${__mod_deps})\n")
set(OPENCV_DEPENDENCIES_MAP_${suffix} "${OPENCV_DEPENDENCIES_MAP_${suffix}}set(OpenCV_${__ocv_lib}_EXTRA_DEPS_${suffix} ${__ext_deps})\n")
list(APPEND OPENCV_PROCESSED_LIBS ${__ocv_lib})
list(APPEND OPENCV_LIBS_TO_PROCESS ${${__ocv_lib}_MODULE_DEPS_${suffix}})

View File

@ -509,8 +509,6 @@ endmacro()
macro(ocv_add_precompiled_headers the_target)
if("${the_target}" MATCHES "^opencv_test_.*$")
SET(pch_path "test/test_")
elseif("${the_target}" MATCHES "opencv_perf_gpu_cpu")
SET(pch_path "perf_cpu/perf_cpu_")
elseif("${the_target}" MATCHES "^opencv_perf_.*$")
SET(pch_path "perf/perf_")
else()

View File

@ -501,3 +501,12 @@ macro(ocv_parse_header2 LIBNAME HDR_PATH VARNAME)
endif()
endif()
endmacro()
################################################################################################
# short command to setup source group
function(ocv_source_group group)
cmake_parse_arguments(OCV_SOURCE_GROUP "" "" "GLOB" ${ARGN})
file(GLOB srcs ${OCV_SOURCE_GROUP_GLOB})
source_group(${group} FILES ${srcs})
endfunction()

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -175,6 +175,8 @@ a:hover {
div.body p, div.body dd, div.body li {
text-align: justify;
line-height: 130%;
margin-top: 1em;
margin-bottom: 1em;
}
div.body h1,
@ -327,9 +329,9 @@ table.field-list {
margin-top: 20px;
}
ul.simple {
/*ul.simple {
list-style: none;
}
}*/
em.menuselection, em.guilabel {
font-family: {{ theme_guifont }};
@ -384,3 +386,8 @@ margin-top: 0px;
div.body ul.search li {
text-align: left;
}
div.linenodiv {
min-width: 1em;
text-align: right;
}

View File

@ -202,7 +202,7 @@ Open OpenCV library and samples in Eclipse
Sometimes more advanced manipulations are required:
* The provided projects are configured for ``API 11`` target (and ``API 9`` for the library) that can be missing platform in your Android SDK.
The provided projects are configured for ``API 11`` target (and ``API 9`` for the library) that can be missing platform in your Android SDK.
After right click on any project select :guilabel:`Properties` and then :guilabel:`Android` on the left pane.
Click some target with `API Level` 11 or higher:

View File

@ -75,7 +75,7 @@ You need the following software to be installed in order to develop for Android
sudo update-java-alternatives --set java-6-sun
**TODO:** add a note on Sun/Oracle Java installation on Ubuntu 12.
.. **TODO:** add a note on Sun/Oracle Java installation on Ubuntu 12.
#. **Android SDK**
@ -241,6 +241,7 @@ where:
The script :file:`Android.mk` usually has the following structure:
.. code-block:: make
:linenos:
LOCAL_PATH := $(call my-dir)
@ -258,6 +259,7 @@ This is the minimal file :file:`Android.mk`, which builds C++ source code of an
Usually the file :file:`Application.mk` is optional, but in case of project using OpenCV, when STL and exceptions are used in C++, it also should be created. Example of the file :file:`Application.mk`:
.. code-block:: make
:linenos:
APP_STL := gnustl_static
APP_CPPFLAGS := -frtti -fexceptions
@ -337,7 +339,7 @@ We recommend the approach based on Eclipse :abbr:`CDT(C/C++ Development Tooling)
:alt: Configure CDT
:align: center
` `
And:
.. image:: images/eclipse_cdt_cfg2.png
:alt: Configure CDT
@ -350,6 +352,7 @@ We recommend the approach based on Eclipse :abbr:`CDT(C/C++ Development Tooling)
:align: center
#. Open :guilabel:`Project Properties -> C/C++ Build`, unckeck ``Use default build command``, replace "Build command" text from ``"make"`` to
``"${NDKROOT}/ndk-build.cmd"`` on Windows,
``"${NDKROOT}/ndk-build"`` on Linux and MacOS.
@ -393,7 +396,7 @@ We recommend the approach based on Eclipse :abbr:`CDT(C/C++ Development Tooling)
:alt: Configure CDT
:align: center
.. note:: The latest Android NDK **r8b** has a bit different STL headers path. So if you use this NDK version please use the following modified **Include** paths list:
.. note:: The latest Android NDK **r8b** uses different STL headers path. So if you use this NDK release add the following **Include** paths list instead:
::
@ -412,12 +415,16 @@ AVD
AVD (*Android Virtual Device*) is not probably the most convenient way to test an OpenCV-dependent application, but sure the most uncomplicated one to configure.
#. Assuming you already have *Android SDK* and *Eclipse IDE* installed, in Eclipse go :guilabel:`Window -> AVD Manager`.
**TBD:** how to start AVD Manager without Eclipse...
.. **TBD:** how to start AVD Manager without Eclipse...
#. Press the :guilabel:`New` button in :guilabel:`AVD Manager` window.
#. :guilabel:`Create new Android Virtual Device` window will let you select some properties for your new device, like target API level, size of SD-card and other.
.. image:: images/AVD_create.png
:alt: Configure builders
:align: center
#. When you click the :guilabel:`Create AVD` button, your new AVD will be availible in :guilabel:`AVD Manager`.
#. Press :guilabel:`Start` to launch the device. Be aware that any AVD (a.k.a. Emulator) is usually much slower than a hardware Android device, so it may take up to several minutes to start.
#. Go :guilabel:`Run -> Run/Debug` in Eclipse IDE to run your application in regular or debugging mode. :guilabel:`Device Chooser` will let you choose among the running devices or to start a new one.
@ -435,22 +442,31 @@ Windows host computer
#. Attach the Android device to your PC with a USB cable.
#. Go to :guilabel:`Start Menu` and **right-click** on :guilabel:`Computer`. Select :guilabel:`Manage` in the context menu. You may be asked for Administrative permissions.
#. Select :guilabel:`Device Manager` in the left pane and find an unknown device in the list. You may try unplugging it and then plugging back in order to check whether it's your exact equipment appears in the list.
.. image:: images/usb_device_connect_01.png
:alt: Unknown device
:align: center
#. Try your luck installing `Google USB drivers` without any modifications: **right-click** on the unknown device, select :guilabel:`Properties` menu item --> :guilabel:`Details` tab --> :guilabel:`Update Driver` button.
.. image:: images/usb_device_connect_05.png
:alt: Device properties
:align: center
#. Select :guilabel:`Browse computer for driver software`.
.. image:: images/usb_device_connect_06.png
:alt: Browse for driver
:align: center
#. Specify the path to :file:`<Android SDK folder>/extras/google/usb_driver/` folder.
.. image:: images/usb_device_connect_07.png
:alt: Browse for driver
:align: center
#. If you get the prompt to install unverified drivers and report about success - you've finished with USB driver installation.
.. image:: images/usb_device_connect_08.png
:alt: Install prompt
:align: center
@ -460,23 +476,33 @@ Windows host computer
.. image:: images/usb_device_connect_09.png
:alt: Installed OK
:align: center
#. Otherwise (getting the failure like shown below) follow the next steps.
.. image:: images/usb_device_connect_12.png
:alt: No driver
:align: center
#. Again **right-click** on the unknown device, select :guilabel:`Properties --> Details --> Hardware Ids` and copy the line like ``USB\VID_XXXX&PID_XXXX&MI_XX``.
.. image:: images/usb_device_connect_02.png
:alt: Device properties details
:align: center
#. Now open file :file:`<Android SDK folder>/extras/google/usb_driver/android_winusb.inf`. Select either ``Google.NTx86`` or ``Google.NTamd64`` section depending on your host system architecture.
.. image:: images/usb_device_connect_03.png
:alt: "android_winusb.inf"
:align: center
#. There should be a record like existing ones for your device and you need to add one manually.
.. image:: images/usb_device_connect_04.png
:alt: "android_winusb.inf"
:align: center
#. Save the :file:`android_winusb.inf` file and try to install the USB driver again.
.. image:: images/usb_device_connect_05.png
:alt: Device properties
:align: center
@ -492,7 +518,9 @@ Windows host computer
.. image:: images/usb_device_connect_07.png
:alt: Browse for driver
:align: center
#. This time installation should go successfully.
.. image:: images/usb_device_connect_08.png
:alt: Install prompt
:align: center
@ -502,11 +530,15 @@ Windows host computer
.. image:: images/usb_device_connect_09.png
:alt: Installed OK
:align: center
#. And an unknown device is now recognized as an Android phone.
.. image:: images/usb_device_connect_10.png
:alt: "Known" device
:align: center
#. Successful device USB connection can be verified in console via ``adb devices`` command.
.. image:: images/usb_device_connect_11.png
:alt: "adb devices"
:align: center
@ -523,7 +555,7 @@ By default Linux doesn't recognize Android devices, but it's easy to fix this is
Then restart your adb server (even better to restart the system), plug in your Android device and execute :command:`adb devices` command. You will see the list of attached devices:
.. image:: images/usb_device_connect_ubuntu.png
.. image:: images/usb_device_connect_ubuntu.png
:alt: List of attached devices
:align: center

View File

@ -57,7 +57,7 @@ Using async initialization is a **recommended** way for application development.
To run OpenCV Manager-based application the first time you need to install packages with the `OpenCV Manager` and `OpenCV binary pack` for you platform.
You can do it using Google Play Market or manually with ``adb`` tool:
.. code-block:: sh
.. code-block:: sh
:linenos:
<Android SDK path>/platform-tools/adb install <OpenCV4Android SDK path>/apk/OpenCV_2.4.2_Manager.apk
@ -266,12 +266,12 @@ It will be capable of accessing camera output, processing it and displaying the
#. Set name, target, package and minSDKVersion accordingly.
#. Create a new class (*File -> New -> Class*). Name it for example: *HelloOpenCVView*.
.. image:: images/dev_OCV_new_class.png
:alt: Add a new class.
:align: center
* It should extend *SurfaceView* class.
* It also should implement *SurfaceHolder.Callback*, *Runnable*.
#. Edit *HelloOpenCVView* class.
@ -279,7 +279,9 @@ It will be capable of accessing camera output, processing it and displaying the
* Add an *import* line for *android.content.context*.
* Modify autogenerated stubs: *HelloOpenCVView*, *surfaceCreated*, *surfaceDestroyed* and *surfaceChanged*.
.. code-block:: java
:linenos:
package com.hello.opencv.test;
@ -300,16 +302,18 @@ It will be capable of accessing camera output, processing it and displaying the
cameraRelease();
}
public void surfaceChanged(SurfaceHolder holder, int format, int width,
int height) {
public void surfaceChanged(SurfaceHolder holder, int format, int width, int height) {
cameraSetup(width, height);
}
//...
* Add *cameraOpen*, *cameraRelease* and *cameraSetup* voids as shown below.
* Also, don't forget to add the public void *run()* as follows:
.. code-block:: java
:linenos:
public void run() {
// TODO: loop { getFrame(), processFrame(), drawFrame() }
@ -327,11 +331,10 @@ It will be capable of accessing camera output, processing it and displaying the
// TODO setup camera
}
..
#. Create a new *Activity* (*New -> Other -> Android -> Android Activity*) and name it, for example: *HelloOpenCVActivity*. For this activity define *onCreate*, *onResume* and *onPause* voids.
.. code-block:: java
:linenos:
public void onCreate (Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
@ -359,11 +362,12 @@ It will be capable of accessing camera output, processing it and displaying the
});
ad.show();
}
}
#. Add the following permissions to the AndroidManifest.xml file:
.. code-block:: xml
:linenos:
</application>
@ -372,12 +376,15 @@ It will be capable of accessing camera output, processing it and displaying the
<uses-feature android:name="android.hardware.camera.autofocus" />
#. Reference OpenCV library within your project properties.
.. image:: images/dev_OCV_reference.png
:alt: Reference OpenCV library.
:align: center
#. We now need some code to handle the camera. Update the *HelloOpenCVView* class as follows:
.. code-block:: java
:linenos:
private VideoCapture mCamera;
@ -394,6 +401,7 @@ It will be capable of accessing camera output, processing it and displaying the
}
return true;
}
public void cameraRelease() {
synchronized(this) {
if (mCamera != null) {
@ -402,6 +410,7 @@ It will be capable of accessing camera output, processing it and displaying the
}
}
}
private void cameraSetup(int width, int height) {
synchronized (this) {
if (mCamera != null && mCamera.isOpened()) {
@ -425,7 +434,9 @@ It will be capable of accessing camera output, processing it and displaying the
}
#. The last step would be to update the *run()* void in *HelloOpenCVView* class as follows:
.. code-block:: java
:linenos:
public void run() {
while (true) {
@ -465,5 +476,3 @@ It will be capable of accessing camera output, processing it and displaying the
}
return bmp;
}

View File

@ -42,6 +42,8 @@ set (CMAKE_CXX_OSX_CURRENT_VERSION_FLAG "${CMAKE_C_OSX_CURRENT_VERSION_FLAG}")
set (CMAKE_C_FLAGS "")
set (CMAKE_CXX_FLAGS "-headerpad_max_install_names -fvisibility=hidden -fvisibility-inlines-hidden")
set (CMAKE_CXX_FLAGS_RELEASE "-DNDEBUG -O3 -fomit-frame-pointer -ffast-math")
if (HAVE_FLAG_SEARCH_PATHS_FIRST)
set (CMAKE_C_LINK_FLAGS "-Wl,-search_paths_first ${CMAKE_C_LINK_FLAGS}")
set (CMAKE_CXX_LINK_FLAGS "-Wl,-search_paths_first ${CMAKE_CXX_LINK_FLAGS}")

View File

@ -416,10 +416,10 @@ void cv::triangulatePoints( InputArray _projMatr1, InputArray _projMatr2,
Mat points1 = _projPoints1.getMat(), points2 = _projPoints2.getMat();
if((points1.rows == 1 || points1.cols == 1) && points1.channels() == 2)
points1 = points1.reshape(1, points1.total()).t();
points1 = points1.reshape(1, static_cast<int>(points1.total())).t();
if((points2.rows == 1 || points2.cols == 1) && points2.channels() == 2)
points2 = points2.reshape(1, points2.total()).t();
points2 = points2.reshape(1, static_cast<int>(points2.total())).t();
CvMat cvMatr1 = matr1, cvMatr2 = matr2;
CvMat cvPoints1 = points1, cvPoints2 = points2;

View File

@ -48,6 +48,7 @@ using namespace std;
#include <string>
#include <iostream>
#include <fstream>
#include <functional>
#include <iterator>
#include <limits>
#include <numeric>

View File

@ -42,6 +42,7 @@
#include "test_precomp.hpp"
#include "test_chessboardgenerator.hpp"
#include <functional>
#include <limits>
#include <numeric>

View File

@ -3,13 +3,14 @@ ocv_add_module(core ${ZLIB_LIBRARIES})
ocv_module_include_directories(${ZLIB_INCLUDE_DIR})
if(HAVE_CUDA)
file(GLOB lib_cuda "src/cuda/*.cu")
source_group("Cuda" FILES "${lib_cuda}")
ocv_source_group("Src\\Cuda" GLOB "src/cuda/*.cu")
ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpu/src" "${OpenCV_SOURCE_DIR}/modules/gpu/src/cuda" ${CUDA_INCLUDE_DIRS})
ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef)
file(GLOB lib_cuda "src/cuda/*.cu")
ocv_cuda_compile(cuda_objs ${lib_cuda})
set(cuda_link_libs ${CUDA_LIBRARIES} ${CUDA_npp_LIBRARY})
else()
set(lib_cuda "")

View File

@ -440,7 +440,7 @@ template<typename _Tp, int m, int n> class CV_EXPORTS Matx
{
public:
typedef _Tp value_type;
typedef Matx<_Tp, MIN(m, n), 1> diag_type;
typedef Matx<_Tp, (m < n ? m : n), 1> diag_type;
typedef Matx<_Tp, m, n> mat_type;
enum { depth = DataDepth<_Tp>::value, rows = m, cols = n, channels = rows*cols,
type = CV_MAKETYPE(depth, channels) };
@ -4620,6 +4620,34 @@ public:
CV_EXPORTS void parallel_for_(const Range& range, const ParallelLoopBody& body);
/////////////////////////// Synchronization Primitives ///////////////////////////////
class CV_EXPORTS Mutex
{
public:
Mutex();
~Mutex();
Mutex(const Mutex& m);
Mutex& operator = (const Mutex& m);
void lock();
bool trylock();
void unlock();
struct Impl;
protected:
Impl* impl;
};
class CV_EXPORTS AutoLock
{
public:
AutoLock(Mutex& m) : mutex(&m) { mutex->lock(); }
~AutoLock() { mutex->unlock(); }
protected:
Mutex* mutex;
};
}
#endif // __cplusplus

View File

@ -72,9 +72,11 @@ namespace cv { namespace gpu
FEATURE_SET_COMPUTE_13 = 13,
FEATURE_SET_COMPUTE_20 = 20,
FEATURE_SET_COMPUTE_21 = 21,
FEATURE_SET_COMPUTE_30 = 30,
GLOBAL_ATOMICS = FEATURE_SET_COMPUTE_11,
SHARED_ATOMICS = FEATURE_SET_COMPUTE_12,
NATIVE_DOUBLE = FEATURE_SET_COMPUTE_13
NATIVE_DOUBLE = FEATURE_SET_COMPUTE_13,
WARP_SHUFFLE_FUNCTIONS = FEATURE_SET_COMPUTE_30
};
// Gives information about what GPU archs this OpenCV GPU module was

View File

@ -282,6 +282,11 @@ namespace cv { namespace gpu { namespace device
cv::gpu::device::transform((DevMem2D_<T>)src, (DevMem2D_<D>)dst, op, WithOutMask(), stream);
}
#if defined __clang__
# pragma clang diagnostic push
# pragma clang diagnostic ignored "-Wmissing-declarations"
#endif
void convert_gpu(DevMem2Db src, int sdepth, DevMem2Db dst, int ddepth, double alpha, double beta, cudaStream_t stream)
{
typedef void (*caller_t)(DevMem2Db src, DevMem2Db dst, double alpha, double beta, cudaStream_t stream);
@ -318,4 +323,8 @@ namespace cv { namespace gpu { namespace device
func(src, dst, alpha, beta, stream);
}
#if defined __clang__
# pragma clang diagnostic pop
#endif
}}} // namespace cv { namespace gpu { namespace device

View File

@ -1199,10 +1199,6 @@ namespace
void setTo(GpuMat& m, Scalar s, const GpuMat& mask) const
{
NppiSize sz;
sz.width = m.cols;
sz.height = m.rows;
if (mask.empty())
{
if (s[0] == 0.0 && s[1] == 0.0 && s[2] == 0.0 && s[3] == 0.0)

View File

@ -42,6 +42,16 @@
#include "precomp.hpp"
#if !defined HAVE_TBB && !defined HAVE_OPENMP && !defined HAVE_GCD && !defined HAVE_CONCURRENCY
#ifdef __APPLE__
#define HAVE_GCD
#elif defined _MSC_VER && _MSC_VER >= 1600
#define HAVE_CONCURRENCY
#endif
#endif
#ifdef HAVE_CONCURRENCY
# include <ppl.h>
#elif defined HAVE_OPENMP
@ -106,7 +116,22 @@ namespace cv
#elif defined HAVE_CONCURRENCY
Concurrency::parallel_for(range.start, range.end, body);
class ConcurrencyProxyLoopBody
{
public:
ConcurrencyProxyLoopBody(const ParallelLoopBody& body) : _body(body) {}
void operator ()(int i) const
{
_body(Range(i, i + 1));
}
private:
const ParallelLoopBody& _body;
ConcurrencyProxyLoopBody& operator=(const ConcurrencyProxyLoopBody&) {return *this;}
} proxy(body);
Concurrency::parallel_for(range.start, range.end, proxy);
#elif defined HAVE_OPENMP

View File

@ -930,4 +930,104 @@ BOOL WINAPI DllMain( HINSTANCE, DWORD fdwReason, LPVOID )
}
#endif
namespace cv
{
#if defined WIN32 || defined _WIN32 || defined WINCE
struct Mutex::Impl
{
Impl() { InitializeCriticalSection(&cs); refcount = 1; }
~Impl() { DeleteCriticalSection(&cs); }
void lock() { EnterCriticalSection(&cs); }
bool trylock() { return TryEnterCriticalSection(&cs) != 0; }
void unlock() { LeaveCriticalSection(&cs); }
CRITICAL_SECTION cs;
int refcount;
};
#elif defined __APPLE__
#include <libkern/OSAtomic.h>
struct Mutex::Impl
{
Impl() { sl = OS_SPINLOCK_INIT; refcount = 1; }
~Impl() {}
void lock() { OSSpinLockLock(&sl); }
bool trylock() { return OSSpinLockTry(&sl); }
void unlock() { OSSpinLockUnlock(&sl); }
OSSpinLock sl;
int refcount;
};
#elif defined __linux__ && !defined ANDROID
struct Mutex::Impl
{
Impl() { pthread_spin_init(&sl, 0); refcount = 1; }
~Impl() { pthread_spin_destroy(&sl); }
void lock() { pthread_spin_lock(&sl); }
bool trylock() { return pthread_spin_trylock(&sl) == 0; }
void unlock() { pthread_spin_unlock(&sl); }
pthread_spinlock_t sl;
int refcount;
};
#else
struct Mutex::Impl
{
Impl() { pthread_mutex_init(&sl, 0); refcount = 1; }
~Impl() { pthread_mutex_destroy(&sl); }
void lock() { pthread_mutex_lock(&sl); }
bool trylock() { return pthread_mutex_trylock(&sl) == 0; }
void unlock() { pthread_mutex_unlock(&sl); }
pthread_mutex_t sl;
int refcount;
};
#endif
Mutex::Mutex()
{
impl = new Mutex::Impl;
}
Mutex::~Mutex()
{
if( CV_XADD(&impl->refcount, -1) == 1 )
delete impl;
impl = 0;
}
Mutex::Mutex(const Mutex& m)
{
impl = m.impl;
CV_XADD(&impl->refcount, 1);
}
Mutex& Mutex::operator = (const Mutex& m)
{
CV_XADD(&m.impl->refcount, 1);
if( CV_XADD(&impl->refcount, -1) == 1 )
delete impl;
impl = m.impl;
return *this;
}
void Mutex::lock() { impl->lock(); }
void Mutex::unlock() { impl->unlock(); }
bool Mutex::trylock() { return impl->trylock(); }
}
/* End of file. */

View File

@ -59,7 +59,7 @@ CV_INIT_ALGORITHM(BriefDescriptorExtractor, "Feature2D.BRIEF",
CV_INIT_ALGORITHM(FastFeatureDetector, "Feature2D.FAST",
obj.info()->addParam(obj, "threshold", obj.threshold);
obj.info()->addParam(obj, "nonmaxSuppression", obj.nonmaxSuppression);
obj.info()->addParam(obj, "type", obj.type, FastFeatureDetector::TYPE_9_16));
obj.info()->addParam(obj, "type", obj.type));
///////////////////////////////////////////////////////////////////////////////////////////////////////////

View File

@ -75,8 +75,8 @@ void CV_FastTest::run( int )
vector<KeyPoint> keypoints1;
vector<KeyPoint> keypoints2;
FAST(gray1, keypoints1, 30, type);
FAST(gray2, keypoints2, 30, type);
FAST(gray1, keypoints1, 30, true, type);
FAST(gray2, keypoints2, 30, true, type);
for(size_t i = 0; i < keypoints1.size(); ++i)
{

View File

@ -200,7 +200,7 @@ int CV_KDTreeTest_CPP::checkGetPoins( const Mat& data )
int CV_KDTreeTest_CPP::checkFindBoxed()
{
vector<float> min( dims, minValue), max(dims, maxValue);
vector<float> min( dims, static_cast<float>(minValue)), max(dims, static_cast<float>(maxValue));
vector<int> indices;
tr->findOrthoRange( min, max, indices );
// TODO check indices
@ -214,8 +214,8 @@ int CV_KDTreeTest_CPP::findNeighbors( Mat& points, Mat& neighbors )
const int emax = 20;
Mat neighbors2( neighbors.size(), CV_32SC1 );
int j;
vector<float> min(points.cols, minValue);
vector<float> max(points.cols, maxValue);
vector<float> min(points.cols, static_cast<float>(minValue));
vector<float> max(points.cols, static_cast<float>(maxValue));
for( int pi = 0; pi < points.rows; pi++ )
{
// 1st way

View File

@ -54,7 +54,7 @@ static
Mat generateHomography(float angle)
{
// angle - rotation around Oz in degrees
float angleRadian = angle * CV_PI / 180.;
float angleRadian = static_cast<float>(angle * CV_PI / 180);
Mat H = Mat::eye(3, 3, CV_32FC1);
H.at<float>(0,0) = H.at<float>(1,1) = std::cos(angleRadian);
H.at<float>(0,1) = -std::sin(angleRadian);
@ -69,8 +69,8 @@ Mat rotateImage(const Mat& srcImage, float angle, Mat& dstImage, Mat& dstMask)
// angle - rotation around Oz in degrees
float diag = std::sqrt(static_cast<float>(srcImage.cols * srcImage.cols + srcImage.rows * srcImage.rows));
Mat LUShift = Mat::eye(3, 3, CV_32FC1); // left up
LUShift.at<float>(0,2) = -srcImage.cols/2;
LUShift.at<float>(1,2) = -srcImage.rows/2;
LUShift.at<float>(0,2) = static_cast<float>(-srcImage.cols/2);
LUShift.at<float>(1,2) = static_cast<float>(-srcImage.rows/2);
Mat RDShift = Mat::eye(3, 3, CV_32FC1); // right down
RDShift.at<float>(0,2) = diag/2;
RDShift.at<float>(1,2) = diag/2;
@ -114,7 +114,7 @@ void scaleKeyPoints(const vector<KeyPoint>& src, vector<KeyPoint>& dst, float sc
static
float calcCirclesIntersectArea(const Point2f& p0, float r0, const Point2f& p1, float r1)
{
float c = norm(p0 - p1), sqr_c = c * c;
float c = static_cast<float>(norm(p0 - p1)), sqr_c = c * c;
float sqr_r0 = r0 * r0;
float sqr_r1 = r1 * r1;
@ -125,7 +125,7 @@ float calcCirclesIntersectArea(const Point2f& p0, float r0, const Point2f& p1, f
float minR = std::min(r0, r1);
float maxR = std::max(r0, r1);
if(c + minR <= maxR)
return CV_PI * minR * minR;
return static_cast<float>(CV_PI * minR * minR);
float cos_halfA0 = (sqr_r0 + sqr_c - sqr_r1) / (2 * r0 * c);
float cos_halfA1 = (sqr_r1 + sqr_c - sqr_r0) / (2 * r1 * c);
@ -133,15 +133,15 @@ float calcCirclesIntersectArea(const Point2f& p0, float r0, const Point2f& p1, f
float A0 = 2 * acos(cos_halfA0);
float A1 = 2 * acos(cos_halfA1);
return 0.5 * sqr_r0 * (A0 - sin(A0)) +
0.5 * sqr_r1 * (A1 - sin(A1));
return 0.5f * sqr_r0 * (A0 - sin(A0)) +
0.5f * sqr_r1 * (A1 - sin(A1));
}
static
float calcIntersectRatio(const Point2f& p0, float r0, const Point2f& p1, float r1)
{
float intersectArea = calcCirclesIntersectArea(p0, r0, p1, r1);
float unionArea = CV_PI * (r0 * r0 + r1 * r1) - intersectArea;
float unionArea = static_cast<float>(CV_PI) * (r0 * r0 + r1 * r1) - intersectArea;
return intersectArea / unionArea;
}
@ -160,7 +160,7 @@ void matchKeyPoints(const vector<KeyPoint>& keypoints0, const Mat& H,
matches.clear();
vector<uchar> usedMask(keypoints1.size(), 0);
for(size_t i0 = 0; i0 < keypoints0.size(); i0++)
for(int i0 = 0; i0 < static_cast<int>(keypoints0.size()); i0++)
{
int nearestPointIndex = -1;
float maxIntersectRatio = 0.f;
@ -176,7 +176,7 @@ void matchKeyPoints(const vector<KeyPoint>& keypoints0, const Mat& H,
if(intersectRatio > maxIntersectRatio)
{
maxIntersectRatio = intersectRatio;
nearestPointIndex = i1;
nearestPointIndex = static_cast<int>(i1);
}
}
@ -222,7 +222,7 @@ protected:
const int maxAngle = 360, angleStep = 15;
for(int angle = 0; angle < maxAngle; angle += angleStep)
{
Mat H = rotateImage(image0, angle, image1, mask1);
Mat H = rotateImage(image0, static_cast<float>(angle), image1, mask1);
vector<KeyPoint> keypoints1;
featureDetector->detect(image1, keypoints1, mask1);
@ -339,10 +339,10 @@ protected:
const int maxAngle = 360, angleStep = 15;
for(int angle = 0; angle < maxAngle; angle += angleStep)
{
Mat H = rotateImage(image0, angle, image1, mask1);
Mat H = rotateImage(image0, static_cast<float>(angle), image1, mask1);
vector<KeyPoint> keypoints1;
rotateKeyPoints(keypoints0, H, angle, keypoints1);
rotateKeyPoints(keypoints0, H, static_cast<float>(angle), keypoints1);
Mat descriptors1;
descriptorExtractor->compute(image1, keypoints1, descriptors1);
@ -457,7 +457,7 @@ protected:
keyPointMatchesCount++;
// Check does this inlier have consistent sizes
const float maxSizeDiff = 0.8;//0.9f; // grad
const float maxSizeDiff = 0.8f;//0.9f; // grad
float size0 = keypoints0[matches[m].trainIdx].size;
float size1 = osiKeypoints1[matches[m].queryIdx].size;
CV_Assert(size0 > 0 && size1 > 0);
@ -545,7 +545,7 @@ protected:
resize(image0, image1, Size(), 1./scale, 1./scale);
vector<KeyPoint> keypoints1;
scaleKeyPoints(keypoints0, keypoints1, 1./scale);
scaleKeyPoints(keypoints0, keypoints1, 1.0f/scale);
Mat descriptors1;
descriptorExtractor->compute(image1, keypoints1, descriptors1);

View File

@ -111,43 +111,3 @@ ocv_add_accuracy_tests(FILES "Include" ${test_hdrs}
FILES "Src" ${test_srcs}
${nvidia})
ocv_add_perf_tests()
set(perf_cpu_path "${CMAKE_CURRENT_SOURCE_DIR}/perf_cpu")
if(BUILD_PERF_TESTS AND EXISTS "${perf_cpu_path}")
# opencv_highgui is required for imread/imwrite
set(perf_deps ${the_module} opencv_ts opencv_highgui opencv_imgproc opencv_calib3d opencv_objdetect opencv_video opencv_nonfree)
ocv_check_dependencies(${perf_deps})
if(OCV_DEPENDENCIES_FOUND)
set(the_target "opencv_perf_gpu_cpu")
ocv_module_include_directories(${perf_deps} "${perf_cpu_path}")
if(NOT OPENCV_PERF_${the_module}_CPU_SOURCES)
file(GLOB perf_srcs "${perf_cpu_path}/*.cpp")
file(GLOB perf_hdrs "${perf_cpu_path}/*.hpp" "${perf_cpu_path}/*.h")
source_group("Src" FILES ${perf_srcs})
source_group("Include" FILES ${perf_hdrs})
set(OPENCV_PERF_${the_module}_CPU_SOURCES ${perf_srcs} ${perf_hdrs})
endif()
add_executable(${the_target} ${OPENCV_PERF_${the_module}_CPU_SOURCES})
target_link_libraries(${the_target} ${OPENCV_MODULE_${the_module}_DEPS} ${perf_deps} ${OPENCV_LINKER_LIBS})
# Additional target properties
set_target_properties(${the_target} PROPERTIES
DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}"
RUNTIME_OUTPUT_DIRECTORY "${EXECUTABLE_OUTPUT_PATH}"
)
if(ENABLE_SOLUTION_FOLDERS)
set_target_properties(${the_target} PROPERTIES FOLDER "tests performance")
endif()
ocv_add_precompiled_headers(${the_target})
else(OCV_DEPENDENCIES_FOUND)
#TODO: warn about unsatisfied dependencies
endif(OCV_DEPENDENCIES_FOUND)
endif()

View File

@ -204,7 +204,7 @@ gpu::CascadeClassifier_GPU
--------------------------
.. ocv:class:: gpu::CascadeClassifier_GPU
Cascade classifier class used for object detection. ::
Cascade classifier class used for object detection. Supports HAAR and LBP cascades. ::
class CV_EXPORTS CascadeClassifier_GPU
{
@ -219,6 +219,7 @@ Cascade classifier class used for object detection. ::
/* Returns number of detected objects */
int detectMultiScale( const GpuMat& image, GpuMat& objectsBuf, double scaleFactor=1.2, int minNeighbors=4, Size minSize=Size());
int detectMultiScale( const GpuMat& image, GpuMat& objectsBuf, Size maxObjectSize, Size minSize = Size(), double scaleFactor = 1.1, int minNeighbors = 4);
/* Finds only the largest object. Special mode if training is required.*/
bool findLargestObject;
@ -233,11 +234,11 @@ Cascade classifier class used for object detection. ::
gpu::CascadeClassifier_GPU::CascadeClassifier_GPU
-----------------------------------------------------
Loads the classifier from a file.
Loads the classifier from a file. Cascade type is detected automatically by constructor parameter.
.. ocv:function:: gpu::CascadeClassifier_GPU::CascadeClassifier_GPU(const string& filename)
:param filename: Name of the file from which the classifier is loaded. Only the old ``haar`` classifier (trained by the ``haar`` training application) and NVIDIA's ``nvbin`` are supported.
:param filename: Name of the file from which the classifier is loaded. Only the old ``haar`` classifier (trained by the ``haar`` training application) and NVIDIA's ``nvbin`` are supported for HAAR and only new type of OpenCV XML cascade supported for LBP.
@ -255,8 +256,7 @@ Loads the classifier from a file. The previous content is destroyed.
.. ocv:function:: bool gpu::CascadeClassifier_GPU::load(const string& filename)
:param filename: Name of the file from which the classifier is loaded. Only the old ``haar`` classifier (trained by the ``haar`` training application) and NVIDIA's ``nvbin`` are supported.
:param filename: Name of the file from which the classifier is loaded. Only the old ``haar`` classifier (trained by the ``haar`` training application) and NVIDIA's ``nvbin`` are supported for HAAR and only new type of OpenCV XML cascade supported for LBP.
gpu::CascadeClassifier_GPU::release
@ -273,13 +273,17 @@ Detects objects of different sizes in the input image.
.. ocv:function:: int gpu::CascadeClassifier_GPU::detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, double scaleFactor=1.2, int minNeighbors=4, Size minSize=Size())
.. ocv:function:: int gpu::CascadeClassifier_GPU::detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, Size maxObjectSize, Size minSize = Size(), double scaleFactor = 1.1, int minNeighbors = 4)
:param image: Matrix of type ``CV_8U`` containing an image where objects should be detected.
:param objectsBuf: Buffer to store detected objects (rectangles). If it is empty, it is allocated with the default size. If not empty, the function searches not more than N objects, where ``N = sizeof(objectsBufer's data)/sizeof(cv::Rect)``.
:param scaleFactor: Value to specify how much the image size is reduced at each image scale.
:param maxObjectSize: Maximum possible object size. Objects larger than that are ignored. Used for second signature and supported only for LBP cascades.
:param minNeighbors: Value to specify how many neighbours each candidate rectangle has to retain.
:param scaleFactor: Parameter specifying how much the image size is reduced at each image scale.
:param minNeighbors: Parameter specifying how many neighbors each candidate rectangle should have to retain it.
:param minSize: Minimum possible object size. Objects smaller than that are ignored.

View File

@ -653,7 +653,7 @@ gpu::GMG_GPU
------------
.. ocv:class:: gpu::GMG_GPU
Class used for background/foreground segmentation. ::
Class used for background/foreground segmentation. ::
class GMG_GPU_GPU
{
@ -677,9 +677,9 @@ Class used for background/foreground segmentation. ::
...
};
The class discriminates between foreground and background pixels by building and maintaining a model of the background. Any pixel which does not fit this model is then deemed to be foreground. The class implements algorithm described in [GMG2012]_.
The class discriminates between foreground and background pixels by building and maintaining a model of the background. Any pixel which does not fit this model is then deemed to be foreground. The class implements algorithm described in [GMG2012]_.
Here are important members of the class that control the algorithm, which you can set after constructing the class instance:
Here are important members of the class that control the algorithm, which you can set after constructing the class instance:
.. ocv:member:: int maxFeatures

View File

@ -820,6 +820,7 @@ private:
int nLayers_;
};
//! HoughLines
CV_EXPORTS void HoughLines(const GpuMat& src, GpuMat& lines, float rho, float theta, int threshold, bool doSort = false, int maxLines = 4096);
CV_EXPORTS void HoughLines(const GpuMat& src, GpuMat& lines, GpuMat& accum, GpuMat& buf, float rho, float theta, int threshold, bool doSort = false, int maxLines = 4096);
CV_EXPORTS void HoughLinesTransform(const GpuMat& src, GpuMat& accum, GpuMat& buf, float rho, float theta);

View File

@ -1,255 +1,234 @@
import sys, re
spaces = '[\s]*'
symbols = '[\s\w\d,.=:|]*'
symbols = '[\s\w\d,.:|]*'
def pattern1(prefix, test):
return re.compile(spaces + 'perf::' + prefix + '/' + test + '::' + '\(' + symbols + '\)' + spaces)
return re.compile(spaces + prefix + '_' + test + '::' + symbols + '::' + '\(' + symbols + '\)' + spaces)
def pattern2(prefix, test, cvtype):
return re.compile(spaces + 'perf::' + prefix + '/' + test + '::' + '\(' + symbols + cvtype + symbols + '\)' + spaces)
def pattern2(prefix, test, param1):
return re.compile(spaces + prefix + '_' + test + '::' + symbols + '::' + '\(' + symbols + param1 + symbols + '\)' + spaces)
def pattern3(prefix, test, cvtype, param1):
return re.compile(spaces + 'perf::' + prefix + '/' + test + '::' + '\(' + symbols + cvtype + symbols + param1 + symbols + '\)' + spaces)
def pattern3(prefix, test, param1, param2):
return re.compile(spaces + prefix + '_' + test + '::' + symbols + '::' + '\(' + symbols + param1 + symbols + param2 + symbols + '\)' + spaces)
def pattern4(prefix, test, cvtype, param1, param2):
return re.compile(spaces + 'perf::' + prefix + '/' + test + '::' + '\(' + symbols + cvtype + symbols + param1 + symbols + param2 + symbols + '\)' + spaces)
def pattern4(prefix, test, param1, param2, param3):
return re.compile(spaces + prefix + '_' + test + '::' + symbols + '::' + '\(' + symbols + param1 + symbols + param2 + symbols + param3 + symbols + '\)' + spaces)
def pattern5(prefix, test, param1, param2, param3, param5):
return re.compile(spaces + prefix + '_' + test + '::' + symbols + '::' + '\(' + symbols + param1 + symbols + param2 + symbols + param3 + symbols + param4 + symbols + '\)' + spaces)
npp_patterns = [
##############################################################
# Core
# Core/Add_Mat (CV_8U | CV_16U | CV_32F)
pattern2('Core', 'Add_Mat', '8U'),
pattern2('Core', 'Add_Mat', '16U'),
pattern2('Core', 'Add_Mat', '32F'),
# Core_AddMat (CV_8U | CV_16U | CV_32F)
pattern2('Core', 'AddMat', '8U'),
pattern2('Core', 'AddMat', '16U'),
pattern2('Core', 'AddMat', '32F'),
# Core/Add_Scalar (CV_8U | CV_16U | CV_32F)
pattern2('Core', 'Add_Scalar', '8U'),
pattern2('Core', 'Add_Scalar', '16U'),
pattern2('Core', 'Add_Scalar', '32F'),
# Core_AddScalar (CV_8U | CV_16U | CV_32F)
pattern2('Core', 'AddScalar', '8U'),
pattern2('Core', 'AddScalar', '16U'),
pattern2('Core', 'AddScalar', '32F'),
# Core/Subtract_Mat (CV_8U | CV_16U | CV_32F)
pattern2('Core', 'Subtract_Mat', '8U'),
pattern2('Core', 'Subtract_Mat', '16U'),
pattern2('Core', 'Subtract_Mat', '32F'),
# Core_SubtractMat (CV_8U | CV_16U | CV_32F)
pattern2('Core', 'SubtractMat', '8U'),
pattern2('Core', 'SubtractMat', '16U'),
pattern2('Core', 'SubtractMat', '32F'),
# Core/Subtract_Scalar (CV_8U | CV_16U | CV_32F)
pattern2('Core', 'Subtract_Scalar', '8U'),
pattern2('Core', 'Subtract_Scalar', '16U'),
pattern2('Core', 'Subtract_Scalar', '32F'),
# Core_SubtractScalar (CV_8U | CV_16U | CV_32F)
pattern2('Core', 'SubtractScalar', '8U'),
pattern2('Core', 'SubtractScalar', '16U'),
pattern2('Core', 'SubtractScalar', '32F'),
# Core/Multiply_Mat (CV_8U | CV_16U | CV_32F)
pattern2('Core', 'Multiply_Mat', '8U'),
pattern2('Core', 'Multiply_Mat', '16U'),
pattern2('Core', 'Multiply_Mat', '32F'),
# Core_MultiplyMat (CV_8U | CV_16U | CV_32F)
pattern2('Core', 'MultiplyMat', '8U'),
pattern2('Core', 'MultiplyMat', '16U'),
pattern2('Core', 'MultiplyMat', '32F'),
# Core/Multiply_Scalar (CV_8U | CV_16U | CV_32F)
pattern2('Core', 'Multiply_Scalar', '8U'),
pattern2('Core', 'Multiply_Scalar', '16U'),
pattern2('Core', 'Multiply_Scalar', '32F'),
# Core_MultiplyScalar (CV_8U | CV_16U | CV_32F)
pattern2('Core', 'MultiplyScalar', '8U'),
pattern2('Core', 'MultiplyScalar', '16U'),
pattern2('Core', 'MultiplyScalar', '32F'),
# Core/Divide_Mat (CV_8U | CV_16U | CV_32F)
pattern2('Core', 'Divide_Mat', '8U'),
pattern2('Core', 'Divide_Mat', '16U'),
pattern2('Core', 'Divide_Mat', '32F'),
# Core_DivideMat (CV_8U | CV_16U | CV_32F)
pattern2('Core', 'DivideMat', '8U'),
pattern2('Core', 'DivideMat', '16U'),
pattern2('Core', 'DivideMat', '32F'),
# Core/Divide_Scalar (CV_8U | CV_16U | CV_32F)
pattern2('Core', 'Divide_Scalar', '8U'),
pattern2('Core', 'Divide_Scalar', '16U'),
pattern2('Core', 'Divide_Scalar', '32F'),
# Core_Divide_Scalar (CV_8U | CV_16U | CV_32F)
pattern2('Core', 'DivideScalar', '8U'),
pattern2('Core', 'DivideScalar', '16U'),
pattern2('Core', 'DivideScalar', '32F'),
# Core/AbsDiff_Mat (CV_8U | CV_16U | CV_32F)
pattern2('Core', 'AbsDiff_Mat', '8U'),
pattern2('Core', 'AbsDiff_Mat', '16U'),
pattern2('Core', 'AbsDiff_Mat', '32F'),
# Core_AbsDiff_Mat (CV_8U | CV_16U | CV_32F)
pattern2('Core', 'AbsDiffMat', '8U'),
pattern2('Core', 'AbsDiffMat', '16U'),
pattern2('Core', 'AbsDiffMat', '32F'),
# Core/AbsDiff_Scalar (CV_8U | CV_16U | CV_32F)
pattern2('Core', 'AbsDiff_Scalar', '8U'),
pattern2('Core', 'AbsDiff_Scalar', '16U'),
pattern2('Core', 'AbsDiff_Scalar', '32F'),
# Core_AbsDiffScalar (CV_8U | CV_16U | CV_32F)
pattern2('Core', 'AbsDiffScalar', '8U'),
pattern2('Core', 'AbsDiffScalar', '16U'),
pattern2('Core', 'AbsDiffScalar', '32F'),
# Core/Abs
# Core_Abs
pattern1('Core', 'Abs'),
# Core/Sqr
# Core_Sqr
pattern1('Core', 'Sqr'),
# Core/Sqrt
# Core_Sqrt
pattern1('Core', 'Sqrt'),
# Core/Log
# Core_Log
pattern1('Core', 'Log'),
# Core/Exp
# Core_Exp
pattern1('Core', 'Exp'),
# Core/Bitwise_And_Scalar
pattern1('Core', 'Bitwise_And_Scalar'),
# Core_BitwiseAndScalar
pattern1('Core', 'BitwiseAndScalar'),
# Core/Bitwise_Or_Scalar
pattern1('Core', 'Bitwise_Or_Scalar'),
# Core_BitwiseOrScalar
pattern1('Core', 'BitwiseOrScalar'),
# Core/Bitwise_Xor_Scalar
pattern1('Core', 'Bitwise_Xor_Scalar'),
# Core_BitwiseXorScalar
pattern1('Core', 'BitwiseXorScalar'),
# Core/RShift
# Core_RShift
pattern1('Core', 'RShift'),
# Core/LShift
# Core_LShift
pattern1('Core', 'LShift'),
# Core/Transpose
# Core_Transpose
pattern1('Core', 'Transpose'),
# Core/Flip
# Core_Flip
pattern1('Core', 'Flip'),
# Core/LUT_OneChannel
pattern1('Core', 'LUT_OneChannel'),
# Core_LutOneChannel
pattern1('Core', 'LutOneChannel'),
# Core/LUT_MultiChannel
pattern1('Core', 'LUT_MultiChannel'),
# Core_LutMultiChannel
pattern1('Core', 'LutMultiChannel'),
# Core/Magnitude_Complex
pattern1('Core', 'Magnitude_Complex'),
# Core_MagnitudeComplex
pattern1('Core', 'MagnitudeComplex'),
# Core/Magnitude_Sqr_Complex
pattern1('Core', 'Magnitude_Sqr_Complex'),
# Core_MagnitudeSqrComplex
pattern1('Core', 'MagnitudeSqrComplex'),
# Core/MeanStdDev
# Core_MeanStdDev
pattern1('Core', 'MeanStdDev'),
# Core/NormDiff
# Core_NormDiff
pattern1('Core', 'NormDiff'),
##############################################################
# Filters
# Filters/Blur
# Filters_Blur
pattern1('Filters', 'Blur'),
# Filters/Erode
# Filters_Erode
pattern1('Filters', 'Erode'),
# Filters/Dilate
# Filters_Dilate
pattern1('Filters', 'Dilate'),
# Filters/MorphologyEx
# Filters_MorphologyEx
pattern1('Filters', 'MorphologyEx'),
##############################################################
# ImgProc
# ImgProc/Resize (8UC1 | 8UC4, INTER_NEAREST | INTER_LINEAR)
pattern3('ImgProc', 'Resize', '8UC1', 'INTER_NEAREST'),
pattern3('ImgProc', 'Resize', '8UC4', 'INTER_NEAREST'),
pattern3('ImgProc', 'Resize', '8UC1', 'INTER_LINEAR'),
pattern3('ImgProc', 'Resize', '8UC4', 'INTER_LINEAR'),
# ImgProc_Resize (8U, 1 | 4, INTER_NEAREST | INTER_LINEAR)
pattern4('ImgProc', 'Resize', '8U', '1', 'INTER_NEAREST'),
pattern4('ImgProc', 'Resize', '8U', '4', 'INTER_NEAREST'),
pattern4('ImgProc', 'Resize', '8U', '1', 'INTER_LINEAR'),
pattern4('ImgProc', 'Resize', '8U', '4', 'INTER_LINEAR'),
# ImgProc/Resize (8UC4, INTER_CUBIC)
pattern3('ImgProc', 'Resize', '8UC4', 'INTER_CUBIC'),
# ImgProc_Resize (8U, 4, INTER_CUBIC)
pattern4('ImgProc', 'Resize', '8U', '4', 'INTER_CUBIC'),
# ImgProc/WarpAffine (8UC1 | 8UC3 | 8UC4 | 32FC1 | 32FC3 | 32FC4, INTER_NEAREST | INTER_LINEAR | INTER_CUBIC, BORDER_CONSTANT)
pattern4('ImgProc', 'WarpAffine', '8UC1', 'INTER_NEAREST', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpAffine', '8UC1', 'INTER_LINEAR', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpAffine', '8UC1', 'INTER_CUBIC', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpAffine', '8UC3', 'INTER_NEAREST', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpAffine', '8UC3', 'INTER_LINEAR', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpAffine', '8UC3', 'INTER_CUBIC', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpAffine', '8UC4', 'INTER_NEAREST', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpAffine', '8UC4', 'INTER_LINEAR', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpAffine', '8UC4', 'INTER_CUBIC', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpAffine', '32FC1', 'INTER_NEAREST', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpAffine', '32FC1', 'INTER_LINEAR', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpAffine', '32FC1', 'INTER_CUBIC', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpAffine', '32FC3', 'INTER_NEAREST', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpAffine', '32FC3', 'INTER_LINEAR', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpAffine', '32FC3', 'INTER_CUBIC', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpAffine', '32FC4', 'INTER_NEAREST', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpAffine', '32FC4', 'INTER_LINEAR', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpAffine', '32FC4', 'INTER_CUBIC', 'BORDER_CONSTANT'),
# ImgProc_WarpAffine (8U | 32F, INTER_NEAREST | INTER_LINEAR | INTER_CUBIC, BORDER_CONSTANT)
pattern4('ImgProc', 'WarpAffine', '8U' , 'INTER_NEAREST', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpAffine', '8U' , 'INTER_LINEAR', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpAffine', '8U' , 'INTER_CUBIC', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpAffine', '32F', 'INTER_NEAREST', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpAffine', '32F', 'INTER_LINEAR', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpAffine', '32F', 'INTER_CUBIC', 'BORDER_CONSTANT'),
# ImgProc/WarpPerspective (8UC1 | 8UC3 | 8UC4 | 32FC1 | 32FC3 | 32FC4, INTER_NEAREST | INTER_LINEAR | INTER_CUBIC, BORDER_CONSTANT)
pattern4('ImgProc', 'WarpPerspective', '8UC1', 'INTER_NEAREST', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpPerspective', '8UC1', 'INTER_LINEAR', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpPerspective', '8UC1', 'INTER_CUBIC', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpPerspective', '8UC3', 'INTER_NEAREST', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpPerspective', '8UC3', 'INTER_LINEAR', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpPerspective', '8UC3', 'INTER_CUBIC', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpPerspective', '8UC4', 'INTER_NEAREST', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpPerspective', '8UC4', 'INTER_LINEAR', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpPerspective', '8UC4', 'INTER_CUBIC', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpPerspective', '32FC1', 'INTER_NEAREST', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpPerspective', '32FC1', 'INTER_LINEAR', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpPerspective', '32FC1', 'INTER_CUBIC', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpPerspective', '32FC3', 'INTER_NEAREST', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpPerspective', '32FC3', 'INTER_LINEAR', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpPerspective', '32FC3', 'INTER_CUBIC', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpPerspective', '32FC4', 'INTER_NEAREST', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpPerspective', '32FC4', 'INTER_LINEAR', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpPerspective', '32FC4', 'INTER_CUBIC', 'BORDER_CONSTANT'),
# ImgProc_WarpPerspective (8U | 32F, INTER_NEAREST | INTER_LINEAR | INTER_CUBIC, BORDER_CONSTANT)
pattern4('ImgProc', 'WarpPerspective', '8U' , 'INTER_NEAREST', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpPerspective', '8U' , 'INTER_LINEAR', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpPerspective', '8U' , 'INTER_CUBIC', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpPerspective', '32F', 'INTER_NEAREST', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpPerspective', '32F', 'INTER_LINEAR', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpPerspective', '32F', 'INTER_CUBIC', 'BORDER_CONSTANT'),
# ImgProc/CopyMakeBorder (8UC1 | 8UC4 | 32SC1 | 32FC1, BORDER_CONSTANT)
pattern3('ImgProc', 'CopyMakeBorder', '8UC1', 'BORDER_CONSTANT'),
pattern3('ImgProc', 'CopyMakeBorder', '8UC4', 'BORDER_CONSTANT'),
pattern3('ImgProc', 'CopyMakeBorder', '32SC1', 'BORDER_CONSTANT'),
pattern3('ImgProc', 'CopyMakeBorder', '32FC1', 'BORDER_CONSTANT'),
# ImgProc_CopyMakeBorder (8UC1 | 8UC4 | 32SC1 | 32FC1, BORDER_CONSTANT)
pattern4('ImgProc', 'CopyMakeBorder', '8U' , '1', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'CopyMakeBorder', '8U' , '4', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'CopyMakeBorder', '32S', '1', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'CopyMakeBorder', '32F', '1', 'BORDER_CONSTANT'),
# ImgProc/Threshold (32F, THRESH_TRUNC)
# ImgProc_Threshold (32F, THRESH_TRUNC)
pattern3('ImgProc', 'Threshold', '32F', 'THRESH_TRUNC'),
# ImgProc/Integral_Sqr
pattern1('ImgProc', 'Integral_Sqr'),
# ImgProc_IntegralSqr
pattern1('ImgProc', 'IntegralSqr'),
# ImgProc/HistEven_OneChannel
pattern1('ImgProc', 'HistEven_OneChannel'),
# ImgProc_HistEven_OneChannel
pattern1('ImgProc', 'HistEvenOneChannel'),
# ImgProc/HistEven_FourChannel
pattern1('ImgProc', 'HistEven_FourChannel'),
# ImgProc_HistEven_FourChannel
pattern1('ImgProc', 'HistEvenFourChannel'),
# ImgProc/Rotate
# ImgProc_Rotate
pattern1('ImgProc', 'Rotate'),
# ImgProc/SwapChannels
# ImgProc_SwapChannels
pattern1('ImgProc', 'SwapChannels'),
# ImgProc/AlphaComp
# ImgProc_AlphaComp
pattern1('ImgProc', 'AlphaComp'),
# ImgProc/ImagePyramid_build
pattern1('ImgProc', 'ImagePyramid_build'),
# ImgProc_ImagePyramidBuild
pattern1('ImgProc', 'ImagePyramidBuild'),
# ImgProc/ImagePyramid_getLayer
pattern1('ImgProc', 'ImagePyramid_getLayer'),
# ImgProc_ImagePyramid_getLayer
pattern1('ImgProc', 'ImagePyramidGetLayer'),
##############################################################
# MatOp
# MatOp/SetTo (8UC4 | 16UC1 | 16UC4 | 32FC1 | 32FC4)
pattern2('MatOp', 'SetTo', '8UC4'),
pattern2('MatOp', 'SetTo', '16UC1'),
pattern2('MatOp', 'SetTo', '16UC4'),
pattern2('MatOp', 'SetTo', '32FC1'),
pattern2('MatOp', 'SetTo', '32FC4'),
# MatOp_SetTo (8UC4 | 16UC1 | 16UC4 | 32FC1 | 32FC4)
pattern3('MatOp', 'SetTo', '8U' , '4'),
pattern3('MatOp', 'SetTo', '16U', '1'),
pattern3('MatOp', 'SetTo', '16U', '4'),
pattern3('MatOp', 'SetTo', '32F', '1'),
pattern3('MatOp', 'SetTo', '32F', '4'),
# MatOp/SetToMasked (8UC4 | 16UC1 | 16UC4 | 32FC1 | 32FC4)
pattern2('MatOp', 'SetToMasked', '8UC4'),
pattern2('MatOp', 'SetToMasked', '16UC1'),
pattern2('MatOp', 'SetToMasked', '16UC4'),
pattern2('MatOp', 'SetToMasked', '32FC1'),
pattern2('MatOp', 'SetToMasked', '32FC4'),
# MatOp_SetToMasked (8UC4 | 16UC1 | 16UC4 | 32FC1 | 32FC4)
pattern3('MatOp', 'SetToMasked', '8U' , '4'),
pattern3('MatOp', 'SetToMasked', '16U', '1'),
pattern3('MatOp', 'SetToMasked', '16U', '4'),
pattern3('MatOp', 'SetToMasked', '32F', '1'),
pattern3('MatOp', 'SetToMasked', '32F', '4'),
# MatOp/CopyToMasked (8UC1 | 8UC3 |8UC4 | 16UC1 | 16UC3 | 16UC4 | 32FC1 | 32FC3 | 32FC4)
pattern2('MatOp', 'CopyToMasked', '8UC1'),
pattern2('MatOp', 'CopyToMasked', '8UC3'),
pattern2('MatOp', 'CopyToMasked', '8UC4'),
pattern2('MatOp', 'CopyToMasked', '16UC1'),
pattern2('MatOp', 'CopyToMasked', '16UC3'),
pattern2('MatOp', 'CopyToMasked', '16UC4'),
pattern2('MatOp', 'CopyToMasked', '32FC1'),
pattern2('MatOp', 'CopyToMasked', '32FC3'),
pattern2('MatOp', 'CopyToMasked', '32FC4'),
# MatOp_CopyToMasked (8UC1 | 8UC3 |8UC4 | 16UC1 | 16UC3 | 16UC4 | 32FC1 | 32FC3 | 32FC4)
pattern3('MatOp', 'CopyToMasked', '8U' , '1'),
pattern3('MatOp', 'CopyToMasked', '8U' , '3'),
pattern3('MatOp', 'CopyToMasked', '8U' , '4'),
pattern3('MatOp', 'CopyToMasked', '16U', '1'),
pattern3('MatOp', 'CopyToMasked', '16U', '3'),
pattern3('MatOp', 'CopyToMasked', '16U', '4'),
pattern3('MatOp', 'CopyToMasked', '32F', '1'),
pattern3('MatOp', 'CopyToMasked', '32F', '3'),
pattern3('MatOp', 'CopyToMasked', '32F', '4'),
]
cublasPattern = pattern1('Core', 'GEMM')

125
modules/gpu/perf/main.cpp Normal file
View File

@ -0,0 +1,125 @@
#include "perf_precomp.hpp"
using namespace std;
using namespace cv;
using namespace cv::gpu;
using namespace cvtest;
using namespace testing;
void printOsInfo()
{
#if defined _WIN32
# if defined _WIN64
cout << "OS: Windows x64 \n" << endl;
# else
cout << "OS: Windows x32 \n" << endl;
# endif
#elif defined linux
# if defined _LP64
cout << "OS: Linux x64 \n" << endl;
# else
cout << "OS: Linux x32 \n" << endl;
# endif
#elif defined __APPLE__
# if defined _LP64
cout << "OS: Apple x64 \n" << endl;
# else
cout << "OS: Apple x32 \n" << endl;
# endif
#endif
}
void printCudaInfo()
{
#ifndef HAVE_CUDA
cout << "OpenCV was built without CUDA support \n" << endl;
#else
int driver;
cudaDriverGetVersion(&driver);
cout << "CUDA Driver version: " << driver << '\n';
cout << "CUDA Runtime version: " << CUDART_VERSION << '\n';
cout << endl;
cout << "GPU module was compiled for the following GPU archs:" << endl;
cout << " BIN: " << CUDA_ARCH_BIN << '\n';
cout << " PTX: " << CUDA_ARCH_PTX << '\n';
cout << endl;
int deviceCount = getCudaEnabledDeviceCount();
cout << "CUDA device count: " << deviceCount << '\n';
cout << endl;
for (int i = 0; i < deviceCount; ++i)
{
DeviceInfo info(i);
cout << "Device [" << i << "] \n";
cout << "\t Name: " << info.name() << '\n';
cout << "\t Compute capability: " << info.majorVersion() << '.' << info.minorVersion()<< '\n';
cout << "\t Multi Processor Count: " << info.multiProcessorCount() << '\n';
cout << "\t Total memory: " << static_cast<int>(static_cast<int>(info.totalMemory() / 1024.0) / 1024.0) << " Mb \n";
cout << "\t Free memory: " << static_cast<int>(static_cast<int>(info.freeMemory() / 1024.0) / 1024.0) << " Mb \n";
if (!info.isCompatible())
cout << "\t !!! This device is NOT compatible with current GPU module build \n";
cout << endl;
}
#endif
}
int main(int argc, char** argv)
{
CommandLineParser cmd(argc, (const char**) argv,
"{ print_info_only | print_info_only | false | Print information about system and exit }"
"{ device | device | 0 | Device on which tests will be executed }"
"{ cpu | cpu | false | Run tests on cpu }"
);
printOsInfo();
printCudaInfo();
if (cmd.get<bool>("print_info_only"))
return 0;
int device = cmd.get<int>("device");
bool cpu = cmd.get<bool>("cpu");
#ifndef HAVE_CUDA
cpu = true;
#endif
if (cpu)
{
runOnGpu = false;
cout << "Run tests on CPU \n" << endl;
}
else
{
runOnGpu = true;
if (device < 0 || device >= getCudaEnabledDeviceCount())
{
cerr << "Incorrect device index - " << device << endl;
return -1;
}
DeviceInfo info(device);
if (!info.isCompatible())
{
cerr << "Device " << device << " [" << info.name() << "] is NOT compatible with current GPU module build" << endl;
return -1;
}
setDevice(device);
cout << "Run tests on device " << device << " [" << info.name() << "] \n" << endl;
}
InitGoogleTest(&argc, argv);
perf::TestBase::Init(argc, argv);
return RUN_ALL_TESTS();
}

View File

@ -1,219 +1,263 @@
#include "perf_precomp.hpp"
#ifdef HAVE_CUDA
using namespace std;
using namespace testing;
namespace {
//////////////////////////////////////////////////////////////////////
// StereoBM
GPU_PERF_TEST_1(StereoBM, cv::gpu::DeviceInfo)
typedef pair<string, string> pair_string;
DEF_PARAM_TEST_1(ImagePair, pair_string);
PERF_TEST_P(ImagePair, Calib3D_StereoBM, Values(make_pair<string, string>("gpu/perf/aloe.jpg", "gpu/perf/aloeR.jpg")))
{
cv::gpu::DeviceInfo devInfo = GetParam();
cv::gpu::setDevice(devInfo.deviceID());
cv::Mat img_l_host = readImage("gpu/perf/aloe.jpg", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img_l_host.empty());
cv::Mat img_r_host = readImage("gpu/perf/aloeR.jpg", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img_r_host.empty());
cv::gpu::StereoBM_GPU bm(0, 256);
cv::gpu::GpuMat img_l(img_l_host);
cv::gpu::GpuMat img_r(img_r_host);
cv::gpu::GpuMat dst;
bm(img_l, img_r, dst);
declare.time(5.0);
const cv::Mat imgLeft = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(imgLeft.empty());
const cv::Mat imgRight = readImage(GetParam().second, cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(imgRight.empty());
const int preset = 0;
const int ndisp = 256;
if (runOnGpu)
{
cv::gpu::StereoBM_GPU d_bm(preset, ndisp);
cv::gpu::GpuMat d_imgLeft(imgLeft);
cv::gpu::GpuMat d_imgRight(imgRight);
cv::gpu::GpuMat d_dst;
d_bm(d_imgLeft, d_imgRight, d_dst);
TEST_CYCLE()
{
bm(img_l, img_r, dst);
d_bm(d_imgLeft, d_imgRight, d_dst);
}
}
else
{
cv::StereoBM bm(preset, ndisp);
cv::Mat dst;
bm(imgLeft, imgRight, dst);
TEST_CYCLE()
{
bm(imgLeft, imgRight, dst);
}
}
}
INSTANTIATE_TEST_CASE_P(Calib3D, StereoBM, ALL_DEVICES);
//////////////////////////////////////////////////////////////////////
// StereoBeliefPropagation
GPU_PERF_TEST_1(StereoBeliefPropagation, cv::gpu::DeviceInfo)
PERF_TEST_P(ImagePair, Calib3D_StereoBeliefPropagation, Values(make_pair<string, string>("gpu/stereobp/aloe-L.png", "gpu/stereobp/aloe-R.png")))
{
cv::gpu::DeviceInfo devInfo = GetParam();
cv::gpu::setDevice(devInfo.deviceID());
cv::Mat img_l_host = readImage("gpu/stereobp/aloe-L.png");
ASSERT_FALSE(img_l_host.empty());
cv::Mat img_r_host = readImage("gpu/stereobp/aloe-R.png");
ASSERT_FALSE(img_r_host.empty());
cv::gpu::StereoBeliefPropagation bp(64);
cv::gpu::GpuMat img_l(img_l_host);
cv::gpu::GpuMat img_r(img_r_host);
cv::gpu::GpuMat dst;
bp(img_l, img_r, dst);
declare.time(10.0);
const cv::Mat imgLeft = readImage(GetParam().first);
ASSERT_FALSE(imgLeft.empty());
const cv::Mat imgRight = readImage(GetParam().second);
ASSERT_FALSE(imgRight.empty());
const int ndisp = 64;
if (runOnGpu)
{
cv::gpu::StereoBeliefPropagation d_bp(ndisp);
cv::gpu::GpuMat d_imgLeft(imgLeft);
cv::gpu::GpuMat d_imgRight(imgRight);
cv::gpu::GpuMat d_dst;
d_bp(d_imgLeft, d_imgRight, d_dst);
TEST_CYCLE()
{
bp(img_l, img_r, dst);
d_bp(d_imgLeft, d_imgRight, d_dst);
}
}
else
{
FAIL();
}
}
INSTANTIATE_TEST_CASE_P(Calib3D, StereoBeliefPropagation, ALL_DEVICES);
//////////////////////////////////////////////////////////////////////
// StereoConstantSpaceBP
GPU_PERF_TEST_1(StereoConstantSpaceBP, cv::gpu::DeviceInfo)
PERF_TEST_P(ImagePair, Calib3D_StereoConstantSpaceBP, Values(make_pair<string, string>("gpu/stereobm/aloe-L.png", "gpu/stereobm/aloe-R.png")))
{
cv::gpu::DeviceInfo devInfo = GetParam();
cv::gpu::setDevice(devInfo.deviceID());
cv::Mat img_l_host = readImage("gpu/stereobm/aloe-L.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img_l_host.empty());
cv::Mat img_r_host = readImage("gpu/stereobm/aloe-R.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img_r_host.empty());
cv::gpu::StereoConstantSpaceBP csbp(128);
cv::gpu::GpuMat img_l(img_l_host);
cv::gpu::GpuMat img_r(img_r_host);
cv::gpu::GpuMat dst;
csbp(img_l, img_r, dst);
declare.time(10.0);
const cv::Mat imgLeft = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(imgLeft.empty());
const cv::Mat imgRight = readImage(GetParam().second, cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(imgRight.empty());
const int ndisp = 128;
if (runOnGpu)
{
cv::gpu::StereoConstantSpaceBP d_csbp(ndisp);
cv::gpu::GpuMat d_imgLeft(imgLeft);
cv::gpu::GpuMat d_imgRight(imgRight);
cv::gpu::GpuMat d_dst;
d_csbp(d_imgLeft, d_imgRight, d_dst);
TEST_CYCLE()
{
csbp(img_l, img_r, dst);
d_csbp(d_imgLeft, d_imgRight, d_dst);
}
}
else
{
FAIL();
}
}
INSTANTIATE_TEST_CASE_P(Calib3D, StereoConstantSpaceBP, ALL_DEVICES);
//////////////////////////////////////////////////////////////////////
// DisparityBilateralFilter
GPU_PERF_TEST_1(DisparityBilateralFilter, cv::gpu::DeviceInfo)
PERF_TEST_P(ImagePair, Calib3D_DisparityBilateralFilter, Values(make_pair<string, string>("gpu/stereobm/aloe-L.png", "gpu/stereobm/aloe-disp.png")))
{
cv::gpu::DeviceInfo devInfo = GetParam();
cv::gpu::setDevice(devInfo.deviceID());
const cv::Mat img = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
cv::Mat img_host = readImage("gpu/stereobm/aloe-L.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img_host.empty());
const cv::Mat disp = readImage(GetParam().second, cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(disp.empty());
cv::Mat disp_host = readImage("gpu/stereobm/aloe-disp.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(disp_host.empty());
const int ndisp = 128;
cv::gpu::DisparityBilateralFilter f(128);
cv::gpu::GpuMat img(img_host);
cv::gpu::GpuMat disp(disp_host);
cv::gpu::GpuMat dst;
if (runOnGpu)
{
cv::gpu::DisparityBilateralFilter d_filter(ndisp);
f(disp, img, dst);
cv::gpu::GpuMat d_img(img);
cv::gpu::GpuMat d_disp(disp);
cv::gpu::GpuMat d_dst;
d_filter(d_disp, d_img, d_dst);
TEST_CYCLE()
{
f(disp, img, dst);
d_filter(d_disp, d_img, d_dst);
}
}
else
{
FAIL();
}
}
INSTANTIATE_TEST_CASE_P(Calib3D, DisparityBilateralFilter, ALL_DEVICES);
//////////////////////////////////////////////////////////////////////
// TransformPoints
IMPLEMENT_PARAM_CLASS(Count, int)
DEF_PARAM_TEST_1(Count, int);
GPU_PERF_TEST(TransformPoints, cv::gpu::DeviceInfo, Count)
PERF_TEST_P(Count, Calib3D_TransformPoints, Values(5000, 10000, 20000))
{
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::gpu::setDevice(devInfo.deviceID());
const int count = GetParam();
int count = GET_PARAM(1);
cv::Mat src(1, count, CV_32FC3);
fillRandom(src, -100, 100);
cv::Mat src_host(1, count, CV_32FC3);
fill(src_host, -100, 100);
const cv::Mat rvec = cv::Mat::ones(1, 3, CV_32FC1);
const cv::Mat tvec = cv::Mat::ones(1, 3, CV_32FC1);
cv::gpu::GpuMat src(src_host);
cv::Mat rvec = cv::Mat::ones(1, 3, CV_32FC1);
cv::Mat tvec = cv::Mat::ones(1, 3, CV_32FC1);
cv::gpu::GpuMat dst;
if (runOnGpu)
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::transformPoints(src, rvec, tvec, dst);
cv::gpu::transformPoints(d_src, rvec, tvec, d_dst);
TEST_CYCLE()
{
cv::gpu::transformPoints(src, rvec, tvec, dst);
cv::gpu::transformPoints(d_src, rvec, tvec, d_dst);
}
}
else
{
FAIL();
}
}
INSTANTIATE_TEST_CASE_P(Calib3D, TransformPoints, testing::Combine(
ALL_DEVICES,
testing::Values<Count>(5000, 10000, 20000)));
//////////////////////////////////////////////////////////////////////
// ProjectPoints
GPU_PERF_TEST(ProjectPoints, cv::gpu::DeviceInfo, Count)
PERF_TEST_P(Count, Calib3D_ProjectPoints, Values(5000, 10000, 20000))
{
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::gpu::setDevice(devInfo.deviceID());
const int count = GetParam();
int count = GET_PARAM(1);
cv::Mat src(1, count, CV_32FC3);
fillRandom(src, -100, 100);
cv::Mat src_host(1, count, CV_32FC3);
fill(src_host, -100, 100);
const cv::Mat rvec = cv::Mat::ones(1, 3, CV_32FC1);
const cv::Mat tvec = cv::Mat::ones(1, 3, CV_32FC1);
const cv::Mat camera_mat = cv::Mat::ones(3, 3, CV_32FC1);
cv::gpu::GpuMat src(src_host);
cv::Mat rvec = cv::Mat::ones(1, 3, CV_32FC1);
cv::Mat tvec = cv::Mat::ones(1, 3, CV_32FC1);
cv::Mat camera_mat = cv::Mat::ones(3, 3, CV_32FC1);
cv::gpu::GpuMat dst;
if (runOnGpu)
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::projectPoints(src, rvec, tvec, camera_mat, cv::Mat(), dst);
cv::gpu::projectPoints(d_src, rvec, tvec, camera_mat, cv::Mat(), d_dst);
TEST_CYCLE()
{
cv::gpu::projectPoints(src, rvec, tvec, camera_mat, cv::Mat(), dst);
cv::gpu::projectPoints(d_src, rvec, tvec, camera_mat, cv::Mat(), d_dst);
}
}
else
{
cv::Mat dst;
cv::projectPoints(src, rvec, tvec, camera_mat, cv::noArray(), dst);
TEST_CYCLE()
{
cv::projectPoints(src, rvec, tvec, camera_mat, cv::noArray(), dst);
}
}
}
INSTANTIATE_TEST_CASE_P(Calib3D, ProjectPoints, testing::Combine(
ALL_DEVICES,
testing::Values<Count>(5000, 10000, 20000)));
//////////////////////////////////////////////////////////////////////
// SolvePnPRansac
GPU_PERF_TEST(SolvePnPRansac, cv::gpu::DeviceInfo, Count)
PERF_TEST_P(Count, Calib3D_SolvePnPRansac, Values(5000, 10000, 20000))
{
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::gpu::setDevice(devInfo.deviceID());
declare.time(10.0);
int count = GET_PARAM(1);
const int count = GetParam();
cv::Mat object(1, count, CV_32FC3);
fill(object, -100, 100);
fillRandom(object, -100, 100);
cv::Mat camera_mat(3, 3, CV_32FC1);
fill(camera_mat, 0.5, 1);
fillRandom(camera_mat, 0.5, 1);
camera_mat.at<float>(0, 1) = 0.f;
camera_mat.at<float>(1, 0) = 0.f;
camera_mat.at<float>(2, 0) = 0.f;
camera_mat.at<float>(2, 1) = 0.f;
cv::Mat dist_coef(1, 8, CV_32F, cv::Scalar::all(0));
const cv::Mat dist_coef(1, 8, CV_32F, cv::Scalar::all(0));
std::vector<cv::Point2f> image_vec;
cv::Mat rvec_gold(1, 3, CV_32FC1);
fill(rvec_gold, 0, 1);
fillRandom(rvec_gold, 0, 1);
cv::Mat tvec_gold(1, 3, CV_32FC1);
fill(tvec_gold, 0, 1);
fillRandom(tvec_gold, 0, 1);
cv::projectPoints(object, rvec_gold, tvec_gold, camera_mat, dist_coef, image_vec);
cv::Mat image(1, count, CV_32FC2, &image_vec[0]);
@ -221,82 +265,92 @@ GPU_PERF_TEST(SolvePnPRansac, cv::gpu::DeviceInfo, Count)
cv::Mat rvec;
cv::Mat tvec;
if (runOnGpu)
{
cv::gpu::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
declare.time(3.0);
TEST_CYCLE()
{
cv::gpu::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
}
}
}
else
{
cv::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
INSTANTIATE_TEST_CASE_P(Calib3D, SolvePnPRansac, testing::Combine(
ALL_DEVICES,
testing::Values<Count>(5000, 10000, 20000)));
TEST_CYCLE()
{
cv::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
}
}
}
//////////////////////////////////////////////////////////////////////
// ReprojectImageTo3D
GPU_PERF_TEST(ReprojectImageTo3D, cv::gpu::DeviceInfo, cv::Size, MatDepth)
PERF_TEST_P(Sz_Depth, Calib3D_ReprojectImageTo3D, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16S)))
{
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::gpu::setDevice(devInfo.deviceID());
const cv::Size size = GET_PARAM(0);
const int depth = GET_PARAM(1);
cv::Size size = GET_PARAM(1);
int depth = GET_PARAM(2);
cv::Mat src_host(size, depth);
fill(src_host, 5.0, 30.0);
cv::Mat src(size, depth);
fillRandom(src, 5.0, 30.0);
cv::Mat Q(4, 4, CV_32FC1);
fill(Q, 0.1, 1.0);
fillRandom(Q, 0.1, 1.0);
cv::gpu::GpuMat src(src_host);
cv::gpu::GpuMat dst;
if (runOnGpu)
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::reprojectImageTo3D(src, dst, Q);
cv::gpu::reprojectImageTo3D(d_src, d_dst, Q);
TEST_CYCLE()
{
cv::gpu::reprojectImageTo3D(src, dst, Q);
cv::gpu::reprojectImageTo3D(d_src, d_dst, Q);
}
}
else
{
cv::Mat dst;
cv::reprojectImageTo3D(src, dst, Q);
TEST_CYCLE()
{
cv::reprojectImageTo3D(src, dst, Q);
}
}
}
INSTANTIATE_TEST_CASE_P(Calib3D, ReprojectImageTo3D, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values<MatDepth>(CV_8U, CV_16S)));
//////////////////////////////////////////////////////////////////////
// DrawColorDisp
GPU_PERF_TEST(DrawColorDisp, cv::gpu::DeviceInfo, cv::Size, MatDepth)
PERF_TEST_P(Sz_Depth, Calib3D_DrawColorDisp, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16S)))
{
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::gpu::setDevice(devInfo.deviceID());
const cv::Size size = GET_PARAM(0);
const int type = GET_PARAM(1);
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
cv::Mat src(size, type);
fillRandom(src, 0, 255);
cv::Mat src_host(size, type);
fill(src_host, 0, 255);
if (runOnGpu)
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::GpuMat src(src_host);
cv::gpu::GpuMat dst;
cv::gpu::drawColorDisp(src, dst, 255);
cv::gpu::drawColorDisp(d_src, d_dst, 255);
TEST_CYCLE()
{
cv::gpu::drawColorDisp(src, dst, 255);
cv::gpu::drawColorDisp(d_src, d_dst, 255);
}
}
else
{
FAIL();
}
}
INSTANTIATE_TEST_CASE_P(Calib3D, DrawColorDisp, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatDepth(CV_8U), MatDepth(CV_16S))));
#endif
} // namespace

File diff suppressed because it is too large Load Diff

View File

@ -1,209 +1,278 @@
#include "perf_precomp.hpp"
#ifdef HAVE_CUDA
using namespace std;
using namespace testing;
namespace {
//////////////////////////////////////////////////////////////////////
// SURF
GPU_PERF_TEST_1(SURF, cv::gpu::DeviceInfo)
DEF_PARAM_TEST_1(Image, string);
PERF_TEST_P(Image, Features2D_SURF, Values<string>("gpu/perf/aloe.jpg"))
{
cv::gpu::DeviceInfo devInfo = GetParam();
cv::gpu::setDevice(devInfo.deviceID());
declare.time(50.0);
cv::Mat img_host = readImage("gpu/perf/aloe.jpg", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img_host.empty());
cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
cv::gpu::SURF_GPU surf;
if (runOnGpu)
{
cv::gpu::SURF_GPU d_surf;
cv::gpu::GpuMat img(img_host);
cv::gpu::GpuMat keypoints, descriptors;
cv::gpu::GpuMat d_img(img);
cv::gpu::GpuMat d_keypoints, d_descriptors;
surf(img, cv::gpu::GpuMat(), keypoints, descriptors);
declare.time(2.0);
d_surf(d_img, cv::gpu::GpuMat(), d_keypoints, d_descriptors);
TEST_CYCLE()
{
surf(img, cv::gpu::GpuMat(), keypoints, descriptors);
d_surf(d_img, cv::gpu::GpuMat(), d_keypoints, d_descriptors);
}
}
else
{
cv::SURF surf;
std::vector<cv::KeyPoint> keypoints;
cv::Mat descriptors;
surf(img, cv::noArray(), keypoints, descriptors);
TEST_CYCLE()
{
keypoints.clear();
surf(img, cv::noArray(), keypoints, descriptors);
}
}
}
INSTANTIATE_TEST_CASE_P(Features2D, SURF, ALL_DEVICES);
//////////////////////////////////////////////////////////////////////
// FAST
GPU_PERF_TEST_1(FAST, cv::gpu::DeviceInfo)
PERF_TEST_P(Image, Features2D_FAST, Values<string>("gpu/perf/aloe.jpg"))
{
cv::gpu::DeviceInfo devInfo = GetParam();
cv::gpu::setDevice(devInfo.deviceID());
cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
cv::Mat img_host = readImage("gpu/perf/aloe.jpg", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img_host.empty());
if (runOnGpu)
{
cv::gpu::FAST_GPU d_fast(20);
cv::gpu::FAST_GPU fast(20);
cv::gpu::GpuMat d_img(img);
cv::gpu::GpuMat d_keypoints;
cv::gpu::GpuMat img(img_host);
cv::gpu::GpuMat keypoints;
fast(img, cv::gpu::GpuMat(), keypoints);
d_fast(d_img, cv::gpu::GpuMat(), d_keypoints);
TEST_CYCLE()
{
fast(img, cv::gpu::GpuMat(), keypoints);
d_fast(d_img, cv::gpu::GpuMat(), d_keypoints);
}
}
else
{
std::vector<cv::KeyPoint> keypoints;
cv::FAST(img, keypoints, 20);
TEST_CYCLE()
{
keypoints.clear();
cv::FAST(img, keypoints, 20);
}
}
}
INSTANTIATE_TEST_CASE_P(Features2D, FAST, ALL_DEVICES);
//////////////////////////////////////////////////////////////////////
// ORB
GPU_PERF_TEST_1(ORB, cv::gpu::DeviceInfo)
PERF_TEST_P(Image, Features2D_ORB, Values<string>("gpu/perf/aloe.jpg"))
{
cv::gpu::DeviceInfo devInfo = GetParam();
cv::gpu::setDevice(devInfo.deviceID());
cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
cv::Mat img_host = readImage("gpu/perf/aloe.jpg", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img_host.empty());
if (runOnGpu)
{
cv::gpu::ORB_GPU d_orb(4000);
cv::gpu::ORB_GPU orb(4000);
cv::gpu::GpuMat d_img(img);
cv::gpu::GpuMat d_keypoints, d_descriptors;
cv::gpu::GpuMat img(img_host);
cv::gpu::GpuMat keypoints, descriptors;
d_orb(d_img, cv::gpu::GpuMat(), d_keypoints, d_descriptors);
TEST_CYCLE()
{
orb(img, cv::gpu::GpuMat(), keypoints, descriptors);
d_orb(d_img, cv::gpu::GpuMat(), d_keypoints, d_descriptors);
}
}
else
{
cv::ORB orb(4000);
std::vector<cv::KeyPoint> keypoints;
cv::Mat descriptors;
orb(img, cv::noArray(), keypoints, descriptors);
TEST_CYCLE()
{
keypoints.clear();
orb(img, cv::noArray(), keypoints, descriptors);
}
}
}
INSTANTIATE_TEST_CASE_P(Features2D, ORB, ALL_DEVICES);
//////////////////////////////////////////////////////////////////////
// BFMatch
DEF_PARAM_TEST(DescSize_Norm, int, NormType);
PERF_TEST_P(DescSize_Norm, Features2D_BFMatch, Combine(Values(64, 128, 256), Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING))))
{
declare.time(20.0);
int desc_size = GET_PARAM(0);
int normType = GET_PARAM(1);
int type = normType == cv::NORM_HAMMING ? CV_8U : CV_32F;
cv::Mat query(3000, desc_size, type);
fillRandom(query);
cv::Mat train(3000, desc_size, type);
fillRandom(train);
if (runOnGpu)
{
cv::gpu::BFMatcher_GPU d_matcher(normType);
cv::gpu::GpuMat d_query(query);
cv::gpu::GpuMat d_train(train);
cv::gpu::GpuMat d_trainIdx, d_distance;
d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance);
TEST_CYCLE()
{
d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance);
}
}
else
{
cv::BFMatcher matcher(normType);
std::vector<cv::DMatch> matches;
matcher.match(query, train, matches);
TEST_CYCLE()
{
matcher.match(query, train, matches);
}
}
}
//////////////////////////////////////////////////////////////////////
// BruteForceMatcher_match
// BFKnnMatch
IMPLEMENT_PARAM_CLASS(DescriptorSize, int)
DEF_PARAM_TEST(DescSize_K_Norm, int, int, NormType);
GPU_PERF_TEST(BruteForceMatcher_match, cv::gpu::DeviceInfo, DescriptorSize, NormType)
PERF_TEST_P(DescSize_K_Norm, Features2D_BFKnnMatch, Combine(
Values(64, 128, 256),
Values(2, 3),
Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING))))
{
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::gpu::setDevice(devInfo.deviceID());
declare.time(30.0);
int desc_size = GET_PARAM(1);
int desc_size = GET_PARAM(0);
int k = GET_PARAM(1);
int normType = GET_PARAM(2);
int type = normType == cv::NORM_HAMMING ? CV_8U : CV_32F;
cv::Mat query_host(3000, desc_size, type);
fill(query_host, 0.0, 10.0);
cv::Mat query(3000, desc_size, type);
fillRandom(query);
cv::Mat train_host(3000, desc_size, type);
fill(train_host, 0.0, 10.0);
cv::Mat train(3000, desc_size, type);
fillRandom(train);
cv::gpu::BFMatcher_GPU matcher(normType);
if (runOnGpu)
{
cv::gpu::BFMatcher_GPU d_matcher(normType);
cv::gpu::GpuMat query(query_host);
cv::gpu::GpuMat train(train_host);
cv::gpu::GpuMat trainIdx, distance;
cv::gpu::GpuMat d_query(query);
cv::gpu::GpuMat d_train(train);
cv::gpu::GpuMat d_trainIdx, d_distance, d_allDist;
matcher.matchSingle(query, train, trainIdx, distance);
declare.time(3.0);
d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, k);
TEST_CYCLE()
{
matcher.matchSingle(query, train, trainIdx, distance);
d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, k);
}
}
else
{
cv::BFMatcher matcher(normType);
std::vector< std::vector<cv::DMatch> > matches;
matcher.knnMatch(query, train, matches, k);
TEST_CYCLE()
{
matcher.knnMatch(query, train, matches, k);
}
}
}
INSTANTIATE_TEST_CASE_P(Features2D, BruteForceMatcher_match, testing::Combine(
ALL_DEVICES,
testing::Values(DescriptorSize(64), DescriptorSize(128), DescriptorSize(256)),
testing::Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING))));
//////////////////////////////////////////////////////////////////////
// BruteForceMatcher_knnMatch
// BFRadiusMatch
IMPLEMENT_PARAM_CLASS(K, int)
GPU_PERF_TEST(BruteForceMatcher_knnMatch, cv::gpu::DeviceInfo, DescriptorSize, K, NormType)
PERF_TEST_P(DescSize_Norm, Features2D_BFRadiusMatch, Combine(Values(64, 128, 256), Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING))))
{
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::gpu::setDevice(devInfo.deviceID());
declare.time(30.0);
int desc_size = GET_PARAM(1);
int k = GET_PARAM(2);
int normType = GET_PARAM(3);
int desc_size = GET_PARAM(0);
int normType = GET_PARAM(1);
int type = normType == cv::NORM_HAMMING ? CV_8U : CV_32F;
cv::Mat query_host(3000, desc_size, type);
fill(query_host, 0.0, 10.0);
cv::Mat query(3000, desc_size, type);
fillRandom(query, 0.0, 1.0);
cv::Mat train_host(3000, desc_size, type);
fill(train_host, 0.0, 10.0);
cv::Mat train(3000, desc_size, type);
fillRandom(train, 0.0, 1.0);
cv::gpu::BFMatcher_GPU matcher(normType);
if (runOnGpu)
{
cv::gpu::BFMatcher_GPU d_matcher(normType);
cv::gpu::GpuMat query(query_host);
cv::gpu::GpuMat train(train_host);
cv::gpu::GpuMat trainIdx, distance, allDist;
cv::gpu::GpuMat d_query(query);
cv::gpu::GpuMat d_train(train);
cv::gpu::GpuMat d_trainIdx, d_nMatches, d_distance;
matcher.knnMatchSingle(query, train, trainIdx, distance, allDist, k);
declare.time(3.0);
d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, 2.0);
TEST_CYCLE()
{
matcher.knnMatchSingle(query, train, trainIdx, distance, allDist, k);
d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, 2.0);
}
}
}
else
{
cv::BFMatcher matcher(normType);
INSTANTIATE_TEST_CASE_P(Features2D, BruteForceMatcher_knnMatch, testing::Combine(
ALL_DEVICES,
testing::Values(DescriptorSize(64), DescriptorSize(128), DescriptorSize(256)),
testing::Values(K(2), K(3)),
testing::Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING))));
std::vector< std::vector<cv::DMatch> > matches;
//////////////////////////////////////////////////////////////////////
// BruteForceMatcher_radiusMatch
GPU_PERF_TEST(BruteForceMatcher_radiusMatch, cv::gpu::DeviceInfo, DescriptorSize, NormType)
{
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::gpu::setDevice(devInfo.deviceID());
int desc_size = GET_PARAM(1);
int normType = GET_PARAM(2);
int type = normType == cv::NORM_HAMMING ? CV_8U : CV_32F;
cv::Mat query_host(3000, desc_size, type);
fill(query_host, 0.0, 1.0);
cv::Mat train_host(3000, desc_size, type);
fill(train_host, 0.0, 1.0);
cv::gpu::BFMatcher_GPU matcher(normType);
cv::gpu::GpuMat query(query_host);
cv::gpu::GpuMat train(train_host);
cv::gpu::GpuMat trainIdx, nMatches, distance;
matcher.radiusMatchSingle(query, train, trainIdx, distance, nMatches, 2.0);
declare.time(3.0);
matcher.radiusMatch(query, train, matches, 2.0);
TEST_CYCLE()
{
matcher.radiusMatchSingle(query, train, trainIdx, distance, nMatches, 2.0);
matcher.radiusMatch(query, train, matches, 2.0);
}
}
}
INSTANTIATE_TEST_CASE_P(Features2D, BruteForceMatcher_radiusMatch, testing::Combine(
ALL_DEVICES,
testing::Values(DescriptorSize(64), DescriptorSize(128), DescriptorSize(256)),
testing::Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING))));
#endif
} // namespace

View File

@ -1,308 +1,379 @@
#include "perf_precomp.hpp"
#ifdef HAVE_CUDA
using namespace std;
using namespace testing;
namespace {
//////////////////////////////////////////////////////////////////////
// Blur
IMPLEMENT_PARAM_CLASS(KernelSize, int)
DEF_PARAM_TEST(Sz_Type_KernelSz, cv::Size, MatType, int);
GPU_PERF_TEST(Blur, cv::gpu::DeviceInfo, cv::Size, MatType, KernelSize)
PERF_TEST_P(Sz_Type_KernelSz, Filters_Blur, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4), Values(3, 5, 7)))
{
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::gpu::setDevice(devInfo.deviceID());
declare.time(20.0);
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
int ksize = GET_PARAM(3);
cv::Size size = GET_PARAM(0);
int type = GET_PARAM(1);
int ksize = GET_PARAM(2);
cv::Mat src_host(size, type);
fill(src_host, 0.0, 255.0);
cv::Mat src(size, type);
fillRandom(src);
cv::gpu::GpuMat src(src_host);
cv::gpu::GpuMat dst;
if (runOnGpu)
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::blur(src, dst, cv::Size(ksize, ksize));
cv::gpu::blur(d_src, d_dst, cv::Size(ksize, ksize));
TEST_CYCLE()
{
cv::gpu::blur(src, dst, cv::Size(ksize, ksize));
cv::gpu::blur(d_src, d_dst, cv::Size(ksize, ksize));
}
}
else
{
cv::Mat dst;
cv::blur(src, dst, cv::Size(ksize, ksize));
TEST_CYCLE()
{
cv::blur(src, dst, cv::Size(ksize, ksize));
}
}
}
INSTANTIATE_TEST_CASE_P(Filters, Blur, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4)),
testing::Values(KernelSize(3), KernelSize(5), KernelSize(7))));
//////////////////////////////////////////////////////////////////////
// Sobel
GPU_PERF_TEST(Sobel, cv::gpu::DeviceInfo, cv::Size, MatType, KernelSize)
PERF_TEST_P(Sz_Type_KernelSz, Filters_Sobel, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1), Values(3, 5, 7, 9, 11, 13, 15)))
{
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::gpu::setDevice(devInfo.deviceID());
declare.time(20.0);
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
int ksize = GET_PARAM(3);
cv::Size size = GET_PARAM(0);
int type = GET_PARAM(1);
int ksize = GET_PARAM(2);
cv::Mat src_host(size, type);
fill(src_host, 0.0, 255.0);
cv::Mat src(size, type);
fillRandom(src);
cv::gpu::GpuMat src(src_host);
cv::gpu::GpuMat dst;
cv::gpu::GpuMat buf;
if (runOnGpu)
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::GpuMat d_buf;
cv::gpu::Sobel(src, dst, -1, 1, 1, buf, ksize);
cv::gpu::Sobel(d_src, d_dst, -1, 1, 1, d_buf, ksize);
TEST_CYCLE()
{
cv::gpu::Sobel(src, dst, -1, 1, 1, buf, ksize);
cv::gpu::Sobel(d_src, d_dst, -1, 1, 1, d_buf, ksize);
}
}
else
{
cv::Mat dst;
cv::Sobel(src, dst, -1, 1, 1, ksize);
TEST_CYCLE()
{
cv::Sobel(src, dst, -1, 1, 1, ksize);
}
}
}
INSTANTIATE_TEST_CASE_P(Filters, Sobel, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_32FC1)),
testing::Values(KernelSize(3), KernelSize(5), KernelSize(7), KernelSize(9), KernelSize(11), KernelSize(13), KernelSize(15))));
//////////////////////////////////////////////////////////////////////
// Scharr
GPU_PERF_TEST(Scharr, cv::gpu::DeviceInfo, cv::Size, MatType)
PERF_TEST_P(Sz_Type, Filters_Scharr, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1)))
{
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::gpu::setDevice(devInfo.deviceID());
declare.time(20.0);
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
cv::Size size = GET_PARAM(0);
int type = GET_PARAM(1);
cv::Mat src_host(size, type);
fill(src_host, 0.0, 255.0);
cv::Mat src(size, type);
fillRandom(src);
cv::gpu::GpuMat src(src_host);
cv::gpu::GpuMat dst;
cv::gpu::GpuMat buf;
if (runOnGpu)
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::GpuMat d_buf;
cv::gpu::Scharr(src, dst, -1, 1, 0, buf);
cv::gpu::Scharr(d_src, d_dst, -1, 1, 0, d_buf);
TEST_CYCLE()
{
cv::gpu::Scharr(src, dst, -1, 1, 0, buf);
cv::gpu::Scharr(d_src, d_dst, -1, 1, 0, d_buf);
}
}
else
{
cv::Mat dst;
cv::Scharr(src, dst, -1, 1, 0);
TEST_CYCLE()
{
cv::Scharr(src, dst, -1, 1, 0);
}
}
}
INSTANTIATE_TEST_CASE_P(Filters, Scharr, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_32FC1))));
//////////////////////////////////////////////////////////////////////
// GaussianBlur
GPU_PERF_TEST(GaussianBlur, cv::gpu::DeviceInfo, cv::Size, MatType, KernelSize)
PERF_TEST_P(Sz_Type_KernelSz, Filters_GaussianBlur, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1), Values(3, 5, 7, 9, 11, 13, 15)))
{
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::gpu::setDevice(devInfo.deviceID());
declare.time(20.0);
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
int ksize = GET_PARAM(3);
cv::Size size = GET_PARAM(0);
int type = GET_PARAM(1);
int ksize = GET_PARAM(2);
cv::Mat src_host(size, type);
fill(src_host, 0.0, 255.0);
cv::Mat src(size, type);
fillRandom(src);
cv::gpu::GpuMat src(src_host);
cv::gpu::GpuMat dst;
cv::gpu::GpuMat buf;
if (runOnGpu)
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::GpuMat d_buf;
cv::gpu::GaussianBlur(src, dst, cv::Size(ksize, ksize), buf, 0.5);
cv::gpu::GaussianBlur(d_src, d_dst, cv::Size(ksize, ksize), d_buf, 0.5);
TEST_CYCLE()
{
cv::gpu::GaussianBlur(src, dst, cv::Size(ksize, ksize), buf, 0.5);
cv::gpu::GaussianBlur(d_src, d_dst, cv::Size(ksize, ksize), d_buf, 0.5);
}
}
else
{
cv::Mat dst;
cv::GaussianBlur(src, dst, cv::Size(ksize, ksize), 0.5);
TEST_CYCLE()
{
cv::GaussianBlur(src, dst, cv::Size(ksize, ksize), 0.5);
}
}
}
INSTANTIATE_TEST_CASE_P(Filters, GaussianBlur, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_32FC1)),
testing::Values(KernelSize(3), KernelSize(5), KernelSize(7), KernelSize(9), KernelSize(11), KernelSize(13), KernelSize(15))));
//////////////////////////////////////////////////////////////////////
// Laplacian
GPU_PERF_TEST(Laplacian, cv::gpu::DeviceInfo, cv::Size, MatType, KernelSize)
PERF_TEST_P(Sz_Type_KernelSz, Filters_Laplacian, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(1, 3)))
{
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::gpu::setDevice(devInfo.deviceID());
declare.time(20.0);
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
int ksize = GET_PARAM(3);
cv::Size size = GET_PARAM(0);
int type = GET_PARAM(1);
int ksize = GET_PARAM(2);
cv::Mat src_host(size, type);
fill(src_host, 0.0, 255.0);
cv::Mat src(size, type);
fillRandom(src);
cv::gpu::GpuMat src(src_host);
cv::gpu::GpuMat dst;
if (runOnGpu)
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::Laplacian(src, dst, -1, ksize);
cv::gpu::Laplacian(d_src, d_dst, -1, ksize);
TEST_CYCLE()
{
cv::gpu::Laplacian(src, dst, -1, ksize);
cv::gpu::Laplacian(d_src, d_dst, -1, ksize);
}
}
else
{
cv::Mat dst;
cv::Laplacian(src, dst, -1, ksize);
TEST_CYCLE()
{
cv::Laplacian(src, dst, -1, ksize);
}
}
}
INSTANTIATE_TEST_CASE_P(Filters, Laplacian, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC4)),
testing::Values(KernelSize(1), KernelSize(3))));
//////////////////////////////////////////////////////////////////////
// Erode
GPU_PERF_TEST(Erode, cv::gpu::DeviceInfo, cv::Size, MatType)
PERF_TEST_P(Sz_Type, Filters_Erode, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4)))
{
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::gpu::setDevice(devInfo.deviceID());
declare.time(20.0);
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
cv::Size size = GET_PARAM(0);
int type = GET_PARAM(1);
cv::Mat src_host(size, type);
fill(src_host, 0.0, 255.0);
cv::Mat src(size, type);
fillRandom(src);
cv::Mat ker = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3));
cv::gpu::GpuMat src(src_host);
cv::gpu::GpuMat dst;
cv::gpu::GpuMat buf;
if (runOnGpu)
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::GpuMat d_buf;
cv::gpu::erode(src, dst, ker, buf);
cv::gpu::erode(d_src, d_dst, ker, d_buf);
TEST_CYCLE()
{
cv::gpu::erode(src, dst, ker, buf);
cv::gpu::erode(d_src, d_dst, ker, d_buf);
}
}
else
{
cv::Mat dst;
cv::erode(src, dst, ker);
TEST_CYCLE()
{
cv::erode(src, dst, ker);
}
}
}
INSTANTIATE_TEST_CASE_P(Filters, Erode, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4))));
//////////////////////////////////////////////////////////////////////
// Dilate
GPU_PERF_TEST(Dilate, cv::gpu::DeviceInfo, cv::Size, MatType)
PERF_TEST_P(Sz_Type, Filters_Dilate, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4)))
{
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::gpu::setDevice(devInfo.deviceID());
declare.time(20.0);
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
cv::Size size = GET_PARAM(0);
int type = GET_PARAM(1);
cv::Mat src_host(size, type);
fill(src_host, 0.0, 255.0);
cv::Mat src(size, type);
fillRandom(src);
cv::Mat ker = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3));
cv::gpu::GpuMat src(src_host);
cv::gpu::GpuMat dst;
cv::gpu::GpuMat buf;
if (runOnGpu)
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::GpuMat d_buf;
cv::gpu::dilate(src, dst, ker, buf);
cv::gpu::dilate(d_src, d_dst, ker, d_buf);
TEST_CYCLE()
{
cv::gpu::dilate(src, dst, ker, buf);
cv::gpu::dilate(d_src, d_dst, ker, d_buf);
}
}
else
{
cv::Mat dst;
cv::dilate(src, dst, ker);
TEST_CYCLE()
{
cv::dilate(src, dst, ker);
}
}
}
INSTANTIATE_TEST_CASE_P(Filters, Dilate, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4))));
//////////////////////////////////////////////////////////////////////
// MorphologyEx
CV_ENUM(MorphOp, cv::MORPH_OPEN, cv::MORPH_CLOSE, cv::MORPH_GRADIENT, cv::MORPH_TOPHAT, cv::MORPH_BLACKHAT)
#define ALL_MORPH_OPS testing::Values(MorphOp(cv::MORPH_OPEN), MorphOp(cv::MORPH_CLOSE), MorphOp(cv::MORPH_GRADIENT), MorphOp(cv::MORPH_TOPHAT), MorphOp(cv::MORPH_BLACKHAT))
#define ALL_MORPH_OPS ValuesIn(MorphOp::all())
GPU_PERF_TEST(MorphologyEx, cv::gpu::DeviceInfo, cv::Size, MatType, MorphOp)
DEF_PARAM_TEST(Sz_Type_Op, cv::Size, MatType, MorphOp);
PERF_TEST_P(Sz_Type_Op, Filters_MorphologyEx, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4), ALL_MORPH_OPS))
{
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::gpu::setDevice(devInfo.deviceID());
declare.time(20.0);
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
int morphOp = GET_PARAM(3);
cv::Size size = GET_PARAM(0);
int type = GET_PARAM(1);
int morphOp = GET_PARAM(2);
cv::Mat src_host(size, type);
fill(src_host, 0.0, 255.0);
cv::Mat src(size, type);
fillRandom(src);
cv::Mat ker = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3));
cv::gpu::GpuMat src(src_host);
cv::gpu::GpuMat dst;
cv::gpu::GpuMat buf1;
cv::gpu::GpuMat buf2;
if (runOnGpu)
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::GpuMat d_buf1;
cv::gpu::GpuMat d_buf2;
cv::gpu::morphologyEx(src, dst, morphOp, ker, buf1, buf2);
cv::gpu::morphologyEx(d_src, d_dst, morphOp, ker, d_buf1, d_buf2);
TEST_CYCLE()
{
cv::gpu::morphologyEx(src, dst, morphOp, ker, buf1, buf2);
cv::gpu::morphologyEx(d_src, d_dst, morphOp, ker, d_buf1, d_buf2);
}
}
else
{
cv::Mat dst;
cv::morphologyEx(src, dst, morphOp, ker);
TEST_CYCLE()
{
cv::morphologyEx(src, dst, morphOp, ker);
}
}
}
INSTANTIATE_TEST_CASE_P(Filters, MorphologyEx, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4)),
ALL_MORPH_OPS));
//////////////////////////////////////////////////////////////////////
// Filter2D
GPU_PERF_TEST(Filter2D, cv::gpu::DeviceInfo, cv::Size, MatType, KernelSize)
PERF_TEST_P(Sz_Type_KernelSz, Filters_Filter2D, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(3, 5, 7, 9, 11, 13, 15)))
{
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::gpu::setDevice(devInfo.deviceID());
declare.time(20.0);
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
int ksize = GET_PARAM(3);
cv::Size size = GET_PARAM(0);
int type = GET_PARAM(1);
int ksize = GET_PARAM(2);
cv::Mat src_host(size, type);
fill(src_host, 0.0, 255.0);
cv::Mat src(size, type);
fillRandom(src);
cv::Mat kernel(ksize, ksize, CV_32FC1);
fill(kernel, 0.0, 1.0);
fillRandom(kernel, 0.0, 1.0);
cv::gpu::GpuMat src(src_host);
cv::gpu::GpuMat dst;
if (runOnGpu)
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::filter2D(src, dst, -1, kernel);
cv::gpu::filter2D(d_src, d_dst, -1, kernel);
TEST_CYCLE()
{
cv::gpu::filter2D(src, dst, -1, kernel);
cv::gpu::filter2D(d_src, d_dst, -1, kernel);
}
}
else
{
cv::Mat dst;
cv::filter2D(src, dst, -1, kernel);
TEST_CYCLE()
{
cv::filter2D(src, dst, -1, kernel);
}
}
}
INSTANTIATE_TEST_CASE_P(Filters, Filter2D, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC4)),
testing::Values(KernelSize(3), KernelSize(5), KernelSize(7), KernelSize(9), KernelSize(11), KernelSize(13), KernelSize(15))));
#endif
} // namespace

File diff suppressed because it is too large Load Diff

View File

@ -1,57 +1,113 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2008-2011, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//M*/
#include "perf_precomp.hpp"
#ifdef HAVE_CUDA
using namespace std;
using namespace testing;
GPU_PERF_TEST(ConnectedComponents, cv::gpu::DeviceInfo, cv::Size)
namespace {
DEF_PARAM_TEST_1(Image, string);
struct GreedyLabeling
{
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::gpu::setDevice(devInfo.deviceID());
struct dot
{
int x;
int y;
cv::Mat image = readImage("gpu/labeling/aloe-disp.png", cv::IMREAD_GRAYSCALE);
static dot make(int i, int j)
{
dot d; d.x = i; d.y = j;
return d;
}
};
// cv::threshold(image, image, 150, 255, CV_THRESH_BINARY);
struct InInterval
{
InInterval(const int& _lo, const int& _hi) : lo(-_lo), hi(_hi) {};
const int lo, hi;
bool operator() (const unsigned char a, const unsigned char b) const
{
int d = a - b;
return lo <= d && d <= hi;
}
private:
InInterval& operator=(const InInterval&);
};
GreedyLabeling(cv::Mat img)
: image(img), _labels(image.size(), CV_32SC1, cv::Scalar::all(-1)) {stack = new dot[image.cols * image.rows];}
~GreedyLabeling(){delete[] stack;}
void operator() (cv::Mat labels) const
{
labels.setTo(cv::Scalar::all(-1));
InInterval inInt(0, 2);
int cc = -1;
int* dist_labels = (int*)labels.data;
int pitch = static_cast<int>(labels.step1());
unsigned char* source = (unsigned char*)image.data;
int width = image.cols;
int height = image.rows;
for (int j = 0; j < image.rows; ++j)
for (int i = 0; i < image.cols; ++i)
{
if (dist_labels[j * pitch + i] != -1) continue;
dot* top = stack;
dot p = dot::make(i, j);
cc++;
dist_labels[j * pitch + i] = cc;
while (top >= stack)
{
int* dl = &dist_labels[p.y * pitch + p.x];
unsigned char* sp = &source[p.y * image.step1() + p.x];
dl[0] = cc;
//right
if( p.x < (width - 1) && dl[ +1] == -1 && inInt(sp[0], sp[+1]))
*top++ = dot::make(p.x + 1, p.y);
//left
if( p.x > 0 && dl[-1] == -1 && inInt(sp[0], sp[-1]))
*top++ = dot::make(p.x - 1, p.y);
//bottom
if( p.y < (height - 1) && dl[+pitch] == -1 && inInt(sp[0], sp[+image.step1()]))
*top++ = dot::make(p.x, p.y + 1);
//top
if( p.y > 0 && dl[-pitch] == -1 && inInt(sp[0], sp[-static_cast<int>(image.step1())]))
*top++ = dot::make(p.x, p.y - 1);
p = *--top;
}
}
}
cv::Mat image;
cv::Mat _labels;
dot* stack;
};
PERF_TEST_P(Image, Labeling_ConnectedComponents, Values<string>("gpu/labeling/aloe-disp.png"))
{
declare.time(1.0);
cv::Mat image = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
if (runOnGpu)
{
cv::gpu::GpuMat mask;
mask.create(image.rows, image.cols, CV_8UC1);
@ -62,14 +118,24 @@ GPU_PERF_TEST(ConnectedComponents, cv::gpu::DeviceInfo, cv::Size)
ASSERT_NO_THROW(cv::gpu::labelComponents(mask, components));
declare.time(1.0);
TEST_CYCLE()
{
cv::gpu::labelComponents(mask, components);
}
}
else
{
GreedyLabeling host(image);
host(host._labels);
declare.time(1.0);
TEST_CYCLE()
{
host(host._labels);
}
}
}
INSTANTIATE_TEST_CASE_P(Labeling, ConnectedComponents, testing::Combine(ALL_DEVICES, testing::Values(cv::Size(261, 262))));
#endif
} // namespace

View File

@ -1,20 +0,0 @@
#include "perf_precomp.hpp"
#ifdef HAVE_CUDA
int main(int argc, char **argv)
{
testing::InitGoogleTest(&argc, argv);
perf::TestBase::Init(argc, argv);
return RUN_ALL_TESTS();
}
#else
int main()
{
printf("OpenCV was built without CUDA support\n");
return 0;
}
#endif

View File

@ -1,94 +1,122 @@
#include "perf_precomp.hpp"
#ifdef HAVE_CUDA
using namespace std;
using namespace testing;
namespace {
//////////////////////////////////////////////////////////////////////
// SetTo
GPU_PERF_TEST(SetTo, cv::gpu::DeviceInfo, cv::Size, MatType)
PERF_TEST_P(Sz_Depth_Cn, MatOp_SetTo, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32F, CV_64F), Values(1, 3, 4)))
{
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::gpu::setDevice(devInfo.deviceID());
cv::Size size = GET_PARAM(0);
int depth = GET_PARAM(1);
int channels = GET_PARAM(2);
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
int type = CV_MAKE_TYPE(depth, channels);
cv::gpu::GpuMat src(size, type);
cv::Scalar val(1, 2, 3, 4);
if (runOnGpu)
{
cv::gpu::GpuMat d_src(size, type);
d_src.setTo(val);
TEST_CYCLE()
{
d_src.setTo(val);
}
}
else
{
cv::Mat src(size, type);
src.setTo(val);
TEST_CYCLE()
{
src.setTo(val);
}
}
}
INSTANTIATE_TEST_CASE_P(MatOp, SetTo, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4),
MatType(CV_64FC1), MatType(CV_64FC3), MatType(CV_64FC4))));
//////////////////////////////////////////////////////////////////////
// SetToMasked
GPU_PERF_TEST(SetToMasked, cv::gpu::DeviceInfo, cv::Size, MatType)
PERF_TEST_P(Sz_Depth_Cn, MatOp_SetToMasked, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32F, CV_64F), Values(1, 3, 4)))
{
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::gpu::setDevice(devInfo.deviceID());
cv::Size size = GET_PARAM(0);
int depth = GET_PARAM(1);
int channels = GET_PARAM(2);
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
int type = CV_MAKE_TYPE(depth, channels);
cv::Mat src_host(size, type);
fill(src_host, 0, 255);
cv::Mat src(size, type);
fillRandom(src);
cv::Mat mask_host(size, CV_8UC1);
fill(mask_host, 0, 2);
cv::Mat mask(size, CV_8UC1);
fillRandom(mask, 0, 2);
cv::gpu::GpuMat src(src_host);
cv::Scalar val(1, 2, 3, 4);
cv::gpu::GpuMat mask(mask_host);
if (runOnGpu)
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_mask(mask);
d_src.setTo(val, d_mask);
TEST_CYCLE()
{
d_src.setTo(val, d_mask);
}
}
else
{
src.setTo(val, mask);
TEST_CYCLE()
{
src.setTo(val, mask);
}
}
}
INSTANTIATE_TEST_CASE_P(MatOp, SetToMasked, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4),
MatType(CV_64FC1), MatType(CV_64FC3), MatType(CV_64FC4))));
//////////////////////////////////////////////////////////////////////
// CopyToMasked
GPU_PERF_TEST(CopyToMasked, cv::gpu::DeviceInfo, cv::Size, MatType)
PERF_TEST_P(Sz_Depth_Cn, MatOp_CopyToMasked, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32F, CV_64F), Values(1, 3, 4)))
{
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::gpu::setDevice(devInfo.deviceID());
cv::Size size = GET_PARAM(0);
int depth = GET_PARAM(1);
int channels = GET_PARAM(2);
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
int type = CV_MAKE_TYPE(depth, channels);
cv::Mat src_host(size, type);
fill(src_host, 0, 255);
cv::Mat src(size, type);
fillRandom(src);
cv::Mat mask_host(size, CV_8UC1);
fill(mask_host, 0, 2);
cv::Mat mask(size, CV_8UC1);
fillRandom(mask, 0, 2);
cv::gpu::GpuMat src(src_host);
cv::gpu::GpuMat mask(mask_host);
cv::gpu::GpuMat dst;
if (runOnGpu)
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_mask(mask);
cv::gpu::GpuMat d_dst;
d_src.copyTo(d_dst, d_mask);
TEST_CYCLE()
{
d_src.copyTo(d_dst, d_mask);
}
}
else
{
cv::Mat dst;
src.copyTo(dst, mask);
@ -96,33 +124,38 @@ GPU_PERF_TEST(CopyToMasked, cv::gpu::DeviceInfo, cv::Size, MatType)
{
src.copyTo(dst, mask);
}
}
}
INSTANTIATE_TEST_CASE_P(MatOp, CopyToMasked, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4),
MatType(CV_64FC1), MatType(CV_64FC3), MatType(CV_64FC4))));
//////////////////////////////////////////////////////////////////////
// ConvertTo
GPU_PERF_TEST(ConvertTo, cv::gpu::DeviceInfo, cv::Size, MatDepth, MatDepth)
DEF_PARAM_TEST(Sz_2Depth, cv::Size, MatDepth, MatDepth);
PERF_TEST_P(Sz_2Depth, MatOp_ConvertTo, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32F, CV_64F), Values(CV_8U, CV_16U, CV_32F, CV_64F)))
{
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::gpu::setDevice(devInfo.deviceID());
cv::Size size = GET_PARAM(0);
int depth1 = GET_PARAM(1);
int depth2 = GET_PARAM(2);
cv::Size size = GET_PARAM(1);
int depth1 = GET_PARAM(2);
int depth2 = GET_PARAM(3);
cv::Mat src(size, depth1);
fillRandom(src);
cv::Mat src_host(size, depth1);
fill(src_host, 0, 255);
if (runOnGpu)
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::GpuMat src(src_host);
cv::gpu::GpuMat dst;
d_src.convertTo(d_dst, depth2, 0.5, 1.0);
TEST_CYCLE()
{
d_src.convertTo(d_dst, depth2, 0.5, 1.0);
}
}
else
{
cv::Mat dst;
src.convertTo(dst, depth2, 0.5, 1.0);
@ -130,12 +163,7 @@ GPU_PERF_TEST(ConvertTo, cv::gpu::DeviceInfo, cv::Size, MatDepth, MatDepth)
{
src.convertTo(dst, depth2, 0.5, 1.0);
}
}
}
INSTANTIATE_TEST_CASE_P(MatOp, ConvertTo, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_32F), MatDepth(CV_64F)),
testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_32F), MatDepth(CV_64F))));
#endif
} // namespace

View File

@ -1,22 +1,39 @@
#include "perf_precomp.hpp"
#ifdef HAVE_CUDA
using namespace std;
using namespace testing;
namespace {
///////////////////////////////////////////////////////////////
// HOG
GPU_PERF_TEST_1(HOG, cv::gpu::DeviceInfo)
DEF_PARAM_TEST_1(Image, string);
PERF_TEST_P(Image, ObjDetect_HOG, Values<string>("gpu/hog/road.png"))
{
cv::gpu::DeviceInfo devInfo = GetParam();
cv::gpu::setDevice(devInfo.deviceID());
cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
cv::Mat img_host = readImage("gpu/hog/road.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img_host.empty());
cv::gpu::GpuMat img(img_host);
std::vector<cv::Rect> found_locations;
cv::gpu::HOGDescriptor hog;
if (runOnGpu)
{
cv::gpu::GpuMat d_img(img);
cv::gpu::HOGDescriptor d_hog;
d_hog.setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
d_hog.detectMultiScale(d_img, found_locations);
TEST_CYCLE()
{
d_hog.detectMultiScale(d_img, found_locations);
}
}
else
{
cv::HOGDescriptor hog;
hog.setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
hog.detectMultiScale(img, found_locations);
@ -25,61 +42,90 @@ GPU_PERF_TEST_1(HOG, cv::gpu::DeviceInfo)
{
hog.detectMultiScale(img, found_locations);
}
}
}
INSTANTIATE_TEST_CASE_P(ObjDetect, HOG, ALL_DEVICES);
///////////////////////////////////////////////////////////////
// HaarClassifier
GPU_PERF_TEST_1(HaarClassifier, cv::gpu::DeviceInfo)
typedef pair<string, string> pair_string;
DEF_PARAM_TEST_1(ImageAndCascade, pair_string);
PERF_TEST_P(ImageAndCascade, ObjDetect_HaarClassifier,
Values<pair_string>(make_pair("gpu/haarcascade/group_1_640x480_VGA.pgm", "gpu/perf/haarcascade_frontalface_alt.xml")))
{
cv::gpu::DeviceInfo devInfo = GetParam();
cv::gpu::setDevice(devInfo.deviceID());
cv::Mat img = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
cv::Mat img_host = readImage("gpu/haarcascade/group_1_640x480_VGA.pgm", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img_host.empty());
if (runOnGpu)
{
cv::gpu::CascadeClassifier_GPU d_cascade;
ASSERT_TRUE(d_cascade.load(perf::TestBase::getDataPath(GetParam().second)));
cv::gpu::CascadeClassifier_GPU cascade;
cv::gpu::GpuMat d_img(img);
cv::gpu::GpuMat d_objects_buffer;
d_cascade.detectMultiScale(d_img, d_objects_buffer);
TEST_CYCLE()
{
d_cascade.detectMultiScale(d_img, d_objects_buffer);
}
}
else
{
cv::CascadeClassifier cascade;
ASSERT_TRUE(cascade.load(perf::TestBase::getDataPath("gpu/perf/haarcascade_frontalface_alt.xml")));
cv::gpu::GpuMat img(img_host);
cv::gpu::GpuMat objects_buffer;
std::vector<cv::Rect> rects;
cascade.detectMultiScale(img, objects_buffer);
cascade.detectMultiScale(img, rects);
TEST_CYCLE()
{
cascade.detectMultiScale(img, objects_buffer);
cascade.detectMultiScale(img, rects);
}
}
}
INSTANTIATE_TEST_CASE_P(ObjDetect, HaarClassifier, ALL_DEVICES);
///////////////////////////////////////////////////////////////
// LBP cascade
//===================== LBP cascade ==========================//
GPU_PERF_TEST_1(LBPClassifier, cv::gpu::DeviceInfo)
PERF_TEST_P(ImageAndCascade, ObjDetect_LBPClassifier,
Values<pair_string>(make_pair("gpu/haarcascade/group_1_640x480_VGA.pgm", "gpu/lbpcascade/lbpcascade_frontalface.xml")))
{
cv::gpu::DeviceInfo devInfo = GetParam();
cv::gpu::setDevice(devInfo.deviceID());
cv::Mat img = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
cv::Mat img_host = readImage("gpu/haarcascade/group_1_640x480_VGA.pgm", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img_host.empty());
if (runOnGpu)
{
cv::gpu::CascadeClassifier_GPU d_cascade;
ASSERT_TRUE(d_cascade.load(perf::TestBase::getDataPath(GetParam().second)));
cv::gpu::GpuMat d_img(img);
cv::gpu::GpuMat d_gpu_rects;
d_cascade.detectMultiScale(d_img, d_gpu_rects);
cv::gpu::GpuMat img(img_host);
cv::gpu::GpuMat gpu_rects;
cv::gpu::CascadeClassifier_GPU cascade;
TEST_CYCLE()
{
d_cascade.detectMultiScale(d_img, d_gpu_rects);
}
}
else
{
cv::CascadeClassifier cascade;
ASSERT_TRUE(cascade.load(perf::TestBase::getDataPath("gpu/lbpcascade/lbpcascade_frontalface.xml")));
cascade.detectMultiScale(img, gpu_rects);
std::vector<cv::Rect> rects;
cascade.detectMultiScale(img, rects);
TEST_CYCLE()
{
cascade.detectMultiScale(img, gpu_rects);
cascade.detectMultiScale(img, rects);
}
}
}
INSTANTIATE_TEST_CASE_P(ObjDetect, LBPClassifier, ALL_DEVICES);
#endif
} // namespace

View File

@ -11,6 +11,10 @@
#include "cvconfig.h"
#ifdef HAVE_CUDA
#include <cuda_runtime.h>
#endif
#include "opencv2/ts/ts.hpp"
#include "opencv2/ts/ts_perf.hpp"
@ -18,8 +22,12 @@
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/gpu/gpu.hpp"
#include "opencv2/calib3d/calib3d.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/video/video.hpp"
#include "opencv2/nonfree/nonfree.hpp"
#include "opencv2/legacy/legacy.hpp"
#include "perf_utility.hpp"
#include "utility.hpp"
#ifdef GTEST_CREATE_SHARED_LIBRARY
#error no modules except ts should have GTEST_CREATE_SHARED_LIBRARY defined

View File

@ -1,77 +0,0 @@
#ifndef __OPENCV_PERF_GPU_UTILITY_HPP__
#define __OPENCV_PERF_GPU_UTILITY_HPP__
void fill(cv::Mat& m, double a, double b);
using perf::MatType;
using perf::MatDepth;
CV_ENUM(BorderMode, cv::BORDER_REFLECT101, cv::BORDER_REPLICATE, cv::BORDER_CONSTANT, cv::BORDER_REFLECT, cv::BORDER_WRAP)
CV_ENUM(Interpolation, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC, cv::INTER_AREA)
CV_ENUM(NormType, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2, cv::NORM_HAMMING)
struct CvtColorInfo
{
int scn;
int dcn;
int code;
explicit CvtColorInfo(int scn_=0, int dcn_=0, int code_=0) : scn(scn_), dcn(dcn_), code(code_) {}
};
void PrintTo(const CvtColorInfo& info, std::ostream* os);
#define IMPLEMENT_PARAM_CLASS(name, type) \
class name \
{ \
public: \
name ( type arg = type ()) : val_(arg) {} \
operator type () const {return val_;} \
private: \
type val_; \
}; \
inline void PrintTo( name param, std::ostream* os) \
{ \
*os << #name << " = " << testing::PrintToString(static_cast< type >(param)); \
}
IMPLEMENT_PARAM_CLASS(Channels, int)
namespace cv { namespace gpu
{
void PrintTo(const cv::gpu::DeviceInfo& info, std::ostream* os);
}}
#define GPU_PERF_TEST(name, ...) \
struct name : perf::TestBaseWithParam< std::tr1::tuple< __VA_ARGS__ > > \
{ \
public: \
name() {} \
protected: \
void PerfTestBody(); \
}; \
TEST_P(name, perf){ RunPerfTestBody(); } \
void name :: PerfTestBody()
#define GPU_PERF_TEST_1(name, param_type) \
struct name : perf::TestBaseWithParam< param_type > \
{ \
public: \
name() {} \
protected: \
void PerfTestBody(); \
}; \
TEST_P(name, perf){ RunPerfTestBody(); } \
void name :: PerfTestBody()
#define GPU_TYPICAL_MAT_SIZES testing::Values(perf::szSXGA, perf::sz1080p, cv::Size(1800, 1500))
cv::Mat readImage(const std::string& fileName, int flags = cv::IMREAD_COLOR);
const std::vector<cv::gpu::DeviceInfo>& devices();
#define ALL_DEVICES testing::ValuesIn(devices())
#define GET_PARAM(k) std::tr1::get< k >(GetParam())
#endif // __OPENCV_PERF_GPU_UTILITY_HPP__

File diff suppressed because it is too large Load Diff

View File

@ -4,12 +4,19 @@ using namespace std;
using namespace cv;
using namespace cv::gpu;
void fill(Mat& m, double a, double b)
bool runOnGpu = true;
void fillRandom(Mat& m, double a, double b)
{
RNG rng(123456789);
rng.fill(m, RNG::UNIFORM, Scalar::all(a), Scalar::all(b));
}
Mat readImage(const string& fileName, int flags)
{
return imread(perf::TestBase::getDataPath(fileName), flags);
}
void PrintTo(const CvtColorInfo& info, ostream* os)
{
static const char* str[] =
@ -184,37 +191,3 @@ void PrintTo(const CvtColorInfo& info, ostream* os)
*os << str[info.code];
}
void cv::gpu::PrintTo(const DeviceInfo& info, ostream* os)
{
*os << info.name();
}
Mat readImage(const string& fileName, int flags)
{
return imread(perf::TestBase::getDataPath(fileName), flags);
}
const vector<DeviceInfo>& devices()
{
static vector<DeviceInfo> devs;
static bool first = true;
if (first)
{
int deviceCount = getCudaEnabledDeviceCount();
devs.reserve(deviceCount);
for (int i = 0; i < deviceCount; ++i)
{
DeviceInfo info(i);
if (info.isCompatible())
devs.push_back(info);
}
first = false;
}
return devs;
}

View File

@ -0,0 +1,45 @@
#ifndef __OPENCV_PERF_GPU_UTILITY_HPP__
#define __OPENCV_PERF_GPU_UTILITY_HPP__
#include "opencv2/core/core.hpp"
#include "opencv2/core/gpumat.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/ts/ts_perf.hpp"
extern bool runOnGpu;
void fillRandom(cv::Mat& m, double a = 0.0, double b = 255.0);
cv::Mat readImage(const std::string& fileName, int flags = cv::IMREAD_COLOR);
using perf::MatType;
using perf::MatDepth;
CV_ENUM(BorderMode, cv::BORDER_REFLECT101, cv::BORDER_REPLICATE, cv::BORDER_CONSTANT, cv::BORDER_REFLECT, cv::BORDER_WRAP)
#define ALL_BORDER_MODES testing::ValuesIn(BorderMode::all())
CV_ENUM(Interpolation, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC, cv::INTER_AREA)
#define ALL_INTERPOLATIONS testing::ValuesIn(Interpolation::all())
CV_ENUM(NormType, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2, cv::NORM_HAMMING)
struct CvtColorInfo
{
int scn;
int dcn;
int code;
explicit CvtColorInfo(int scn_=0, int dcn_=0, int code_=0) : scn(scn_), dcn(dcn_), code(code_) {}
};
void PrintTo(const CvtColorInfo& info, std::ostream* os);
#define GET_PARAM(k) std::tr1::get< k >(GetParam())
#define DEF_PARAM_TEST(name, ...) typedef ::perf::TestBaseWithParam< std::tr1::tuple< __VA_ARGS__ > > name
#define DEF_PARAM_TEST_1(name, param_type) typedef ::perf::TestBaseWithParam< param_type > name
DEF_PARAM_TEST_1(Sz, cv::Size);
typedef perf::Size_MatType Sz_Type;
DEF_PARAM_TEST(Sz_Depth, cv::Size, MatDepth);
DEF_PARAM_TEST(Sz_Depth_Cn, cv::Size, MatDepth, int);
#define GPU_TYPICAL_MAT_SIZES testing::Values(perf::szSXGA, perf::sz720p, perf::sz1080p)
#endif // __OPENCV_PERF_GPU_UTILITY_HPP__

View File

@ -1,136 +0,0 @@
#include "perf_cpu_precomp.hpp"
#ifdef HAVE_CUDA
//////////////////////////////////////////////////////////////////////
// StereoBM
GPU_PERF_TEST_1(StereoBM, cv::gpu::DeviceInfo)
{
cv::Mat img_l = readImage("gpu/perf/aloe.jpg", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img_l.empty());
cv::Mat img_r = readImage("gpu/perf/aloeR.jpg", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img_r.empty());
cv::StereoBM bm(0, 256);
cv::Mat dst;
bm(img_l, img_r, dst);
declare.time(5.0);
TEST_CYCLE()
{
bm(img_l, img_r, dst);
}
}
INSTANTIATE_TEST_CASE_P(Calib3D, StereoBM, ALL_DEVICES);
//////////////////////////////////////////////////////////////////////
// ProjectPoints
IMPLEMENT_PARAM_CLASS(Count, int)
GPU_PERF_TEST(ProjectPoints, cv::gpu::DeviceInfo, Count)
{
int count = GET_PARAM(1);
cv::Mat src(1, count, CV_32FC3);
fill(src, -100, 100);
cv::Mat rvec = cv::Mat::ones(1, 3, CV_32FC1);
cv::Mat tvec = cv::Mat::ones(1, 3, CV_32FC1);
cv::Mat camera_mat = cv::Mat::ones(3, 3, CV_32FC1);
cv::Mat dst;
cv::projectPoints(src, rvec, tvec, camera_mat, cv::noArray(), dst);
TEST_CYCLE()
{
cv::projectPoints(src, rvec, tvec, camera_mat, cv::noArray(), dst);
}
}
INSTANTIATE_TEST_CASE_P(Calib3D, ProjectPoints, testing::Combine(
ALL_DEVICES,
testing::Values<Count>(5000, 10000, 20000)));
//////////////////////////////////////////////////////////////////////
// SolvePnPRansac
GPU_PERF_TEST(SolvePnPRansac, cv::gpu::DeviceInfo, Count)
{
int count = GET_PARAM(1);
cv::Mat object(1, count, CV_32FC3);
fill(object, -100, 100);
cv::Mat camera_mat(3, 3, CV_32FC1);
fill(camera_mat, 0.5, 1);
camera_mat.at<float>(0, 1) = 0.f;
camera_mat.at<float>(1, 0) = 0.f;
camera_mat.at<float>(2, 0) = 0.f;
camera_mat.at<float>(2, 1) = 0.f;
cv::Mat dist_coef(1, 8, CV_32F, cv::Scalar::all(0));
std::vector<cv::Point2f> image_vec;
cv::Mat rvec_gold(1, 3, CV_32FC1);
fill(rvec_gold, 0, 1);
cv::Mat tvec_gold(1, 3, CV_32FC1);
fill(tvec_gold, 0, 1);
cv::projectPoints(object, rvec_gold, tvec_gold, camera_mat, dist_coef, image_vec);
cv::Mat image(1, count, CV_32FC2, &image_vec[0]);
cv::Mat rvec;
cv::Mat tvec;
cv::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
declare.time(10.0);
TEST_CYCLE()
{
cv::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
}
}
INSTANTIATE_TEST_CASE_P(Calib3D, SolvePnPRansac, testing::Combine(
ALL_DEVICES,
testing::Values<Count>(5000, 10000, 20000)));
//////////////////////////////////////////////////////////////////////
// ReprojectImageTo3D
GPU_PERF_TEST(ReprojectImageTo3D, cv::gpu::DeviceInfo, cv::Size, MatDepth)
{
cv::Size size = GET_PARAM(1);
int depth = GET_PARAM(2);
cv::Mat src(size, depth);
fill(src, 5.0, 30.0);
cv::Mat Q(4, 4, CV_32FC1);
fill(Q, 0.1, 1.0);
cv::Mat dst;
cv::reprojectImageTo3D(src, dst, Q);
TEST_CYCLE()
{
cv::reprojectImageTo3D(src, dst, Q);
}
}
INSTANTIATE_TEST_CASE_P(Calib3D, ReprojectImageTo3D, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values<MatDepth>(CV_8U, CV_16S)));
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1 +0,0 @@
#include "perf_cpu_precomp.hpp"

View File

@ -1,32 +0,0 @@
#ifdef __GNUC__
# pragma GCC diagnostic ignored "-Wmissing-declarations"
# pragma GCC diagnostic ignored "-Wmissing-prototypes" //OSX
#endif
#ifndef __OPENCV_PERF_CPU_PRECOMP_HPP__
#define __OPENCV_PERF_CPU_PRECOMP_HPP__
#include <cstdio>
#include <iostream>
#include "cvconfig.h"
#include "opencv2/ts/ts.hpp"
#include "opencv2/ts/ts_perf.hpp"
#include "opencv2/core/core.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/gpu/gpu.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/video/video.hpp"
#include "opencv2/calib3d/calib3d.hpp"
#include "opencv2/nonfree/nonfree.hpp"
#include "opencv2/legacy/legacy.hpp"
#include "perf_utility.hpp"
#ifdef GTEST_CREATE_SHARED_LIBRARY
#error no modules except ts should have GTEST_CREATE_SHARED_LIBRARY defined
#endif
#endif

View File

@ -1,187 +0,0 @@
#include "perf_cpu_precomp.hpp"
#ifdef HAVE_CUDA
//////////////////////////////////////////////////////////////////////
// SURF
GPU_PERF_TEST_1(SURF, cv::gpu::DeviceInfo)
{
cv::Mat img = readImage("gpu/perf/aloe.jpg", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
cv::SURF surf;
std::vector<cv::KeyPoint> keypoints;
cv::Mat descriptors;
surf(img, cv::noArray(), keypoints, descriptors);
declare.time(50.0);
TEST_CYCLE()
{
keypoints.clear();
surf(img, cv::noArray(), keypoints, descriptors);
}
}
INSTANTIATE_TEST_CASE_P(Features2D, SURF, ALL_DEVICES);
//////////////////////////////////////////////////////////////////////
// FAST
GPU_PERF_TEST_1(FAST, cv::gpu::DeviceInfo)
{
cv::Mat img = readImage("gpu/perf/aloe.jpg", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
std::vector<cv::KeyPoint> keypoints;
cv::FAST(img, keypoints, 20);
TEST_CYCLE()
{
keypoints.clear();
cv::FAST(img, keypoints, 20);
}
}
INSTANTIATE_TEST_CASE_P(Features2D, FAST, ALL_DEVICES);
//////////////////////////////////////////////////////////////////////
// ORB
GPU_PERF_TEST_1(ORB, cv::gpu::DeviceInfo)
{
cv::Mat img = readImage("gpu/perf/aloe.jpg", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
cv::ORB orb(4000);
std::vector<cv::KeyPoint> keypoints;
cv::Mat descriptors;
orb(img, cv::noArray(), keypoints, descriptors);
TEST_CYCLE()
{
keypoints.clear();
orb(img, cv::noArray(), keypoints, descriptors);
}
}
INSTANTIATE_TEST_CASE_P(Features2D, ORB, ALL_DEVICES);
//////////////////////////////////////////////////////////////////////
// BruteForceMatcher_match
IMPLEMENT_PARAM_CLASS(DescriptorSize, int)
GPU_PERF_TEST(BruteForceMatcher_match, cv::gpu::DeviceInfo, DescriptorSize, NormType)
{
int desc_size = GET_PARAM(1);
int normType = GET_PARAM(2);
int type = normType == cv::NORM_HAMMING ? CV_8U : CV_32F;
cv::Mat query(3000, desc_size, type);
fill(query, 0.0, 10.0);
cv::Mat train(3000, desc_size, type);
fill(train, 0.0, 10.0);
cv::BFMatcher matcher(normType);
std::vector<cv::DMatch> matches;
matcher.match(query, train, matches);
declare.time(20.0);
TEST_CYCLE()
{
matcher.match(query, train, matches);
}
}
INSTANTIATE_TEST_CASE_P(Features2D, BruteForceMatcher_match, testing::Combine(
ALL_DEVICES,
testing::Values(DescriptorSize(64), DescriptorSize(128), DescriptorSize(256)),
testing::Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING))));
//////////////////////////////////////////////////////////////////////
// BruteForceMatcher_knnMatch
IMPLEMENT_PARAM_CLASS(K, int)
GPU_PERF_TEST(BruteForceMatcher_knnMatch, cv::gpu::DeviceInfo, DescriptorSize, K, NormType)
{
int desc_size = GET_PARAM(1);
int k = GET_PARAM(2);
int normType = GET_PARAM(3);
int type = normType == cv::NORM_HAMMING ? CV_8U : CV_32F;
cv::Mat query(3000, desc_size, type);
fill(query, 0.0, 10.0);
cv::Mat train(3000, desc_size, type);
fill(train, 0.0, 10.0);
cv::BFMatcher matcher(normType);
std::vector< std::vector<cv::DMatch> > matches;
matcher.knnMatch(query, train, matches, k);
declare.time(30.0);
TEST_CYCLE()
{
matcher.knnMatch(query, train, matches, k);
}
}
INSTANTIATE_TEST_CASE_P(Features2D, BruteForceMatcher_knnMatch, testing::Combine(
ALL_DEVICES,
testing::Values(DescriptorSize(64), DescriptorSize(128), DescriptorSize(256)),
testing::Values(K(2), K(3)),
testing::Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING))));
//////////////////////////////////////////////////////////////////////
// BruteForceMatcher_radiusMatch
GPU_PERF_TEST(BruteForceMatcher_radiusMatch, cv::gpu::DeviceInfo, DescriptorSize, NormType)
{
int desc_size = GET_PARAM(1);
int normType = GET_PARAM(2);
int type = normType == cv::NORM_HAMMING ? CV_8U : CV_32F;
cv::Mat query(3000, desc_size, type);
fill(query, 0.0, 1.0);
cv::Mat train(3000, desc_size, type);
fill(train, 0.0, 1.0);
cv::BFMatcher matcher(normType);
std::vector< std::vector<cv::DMatch> > matches;
matcher.radiusMatch(query, train, matches, 2.0);
declare.time(30.0);
TEST_CYCLE()
{
matcher.radiusMatch(query, train, matches, 2.0);
}
}
INSTANTIATE_TEST_CASE_P(Features2D, BruteForceMatcher_radiusMatch, testing::Combine(
ALL_DEVICES,
testing::Values(DescriptorSize(64), DescriptorSize(128), DescriptorSize(256)),
testing::Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING))));
#endif

View File

@ -1,283 +0,0 @@
#include "perf_cpu_precomp.hpp"
#ifdef HAVE_CUDA
IMPLEMENT_PARAM_CLASS(KernelSize, int)
//////////////////////////////////////////////////////////////////////
// Blur
GPU_PERF_TEST(Blur, cv::gpu::DeviceInfo, cv::Size, MatType, KernelSize)
{
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
int ksize = GET_PARAM(3);
cv::Mat src(size, type);
fill(src, 0.0, 255.0);
cv::Mat dst;
cv::blur(src, dst, cv::Size(ksize, ksize));
declare.time(20.0);
TEST_CYCLE()
{
cv::blur(src, dst, cv::Size(ksize, ksize));
}
}
INSTANTIATE_TEST_CASE_P(Filters, Blur, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4)),
testing::Values(KernelSize(3), KernelSize(5), KernelSize(7))));
//////////////////////////////////////////////////////////////////////
// Sobel
GPU_PERF_TEST(Sobel, cv::gpu::DeviceInfo, cv::Size, MatType, KernelSize)
{
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
int ksize = GET_PARAM(3);
cv::Mat src(size, type);
fill(src, 0.0, 255.0);
cv::Mat dst;
cv::Sobel(src, dst, -1, 1, 1, ksize);
declare.time(20.0);
TEST_CYCLE()
{
cv::Sobel(src, dst, -1, 1, 1, ksize);
}
}
INSTANTIATE_TEST_CASE_P(Filters, Sobel, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_32FC1)),
testing::Values(KernelSize(3), KernelSize(5), KernelSize(7), KernelSize(9), KernelSize(11), KernelSize(13), KernelSize(15))));
//////////////////////////////////////////////////////////////////////
// Scharr
GPU_PERF_TEST(Scharr, cv::gpu::DeviceInfo, cv::Size, MatType)
{
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
cv::Mat src(size, type);
fill(src, 0.0, 255.0);
cv::Mat dst;
cv::Scharr(src, dst, -1, 1, 0);
declare.time(20.0);
TEST_CYCLE()
{
cv::Scharr(src, dst, -1, 1, 0);
}
}
INSTANTIATE_TEST_CASE_P(Filters, Scharr, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_32FC1))));
//////////////////////////////////////////////////////////////////////
// GaussianBlur
GPU_PERF_TEST(GaussianBlur, cv::gpu::DeviceInfo, cv::Size, MatType, KernelSize)
{
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
int ksize = GET_PARAM(3);
cv::Mat src(size, type);
fill(src, 0.0, 255.0);
cv::Mat dst;
cv::GaussianBlur(src, dst, cv::Size(ksize, ksize), 0.5);
declare.time(20.0);
TEST_CYCLE()
{
cv::GaussianBlur(src, dst, cv::Size(ksize, ksize), 0.5);
}
}
INSTANTIATE_TEST_CASE_P(Filters, GaussianBlur, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_32FC1)),
testing::Values(KernelSize(3), KernelSize(5), KernelSize(7), KernelSize(9), KernelSize(11), KernelSize(13), KernelSize(15))));
//////////////////////////////////////////////////////////////////////
// Laplacian
GPU_PERF_TEST(Laplacian, cv::gpu::DeviceInfo, cv::Size, MatType, KernelSize)
{
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
int ksize = GET_PARAM(3);
cv::Mat src(size, type);
fill(src, 0.0, 255.0);
cv::Mat dst;
cv::Laplacian(src, dst, -1, ksize);
declare.time(20.0);
TEST_CYCLE()
{
cv::Laplacian(src, dst, -1, ksize);
}
}
INSTANTIATE_TEST_CASE_P(Filters, Laplacian, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC4)),
testing::Values(KernelSize(1), KernelSize(3))));
//////////////////////////////////////////////////////////////////////
// Erode
GPU_PERF_TEST(Erode, cv::gpu::DeviceInfo, cv::Size, MatType)
{
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
cv::Mat src(size, type);
fill(src, 0.0, 255.0);
cv::Mat ker = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3));
cv::Mat dst;
cv::erode(src, dst, ker);
declare.time(20.0);
TEST_CYCLE()
{
cv::erode(src, dst, ker);
}
}
INSTANTIATE_TEST_CASE_P(Filters, Erode, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4))));
//////////////////////////////////////////////////////////////////////
// Dilate
GPU_PERF_TEST(Dilate, cv::gpu::DeviceInfo, cv::Size, MatType)
{
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
cv::Mat src(size, type);
fill(src, 0.0, 255.0);
cv::Mat ker = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3));
cv::Mat dst;
cv::dilate(src, dst, ker);
declare.time(20.0);
TEST_CYCLE()
{
cv::dilate(src, dst, ker);
}
}
INSTANTIATE_TEST_CASE_P(Filters, Dilate, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4))));
//////////////////////////////////////////////////////////////////////
// MorphologyEx
CV_ENUM(MorphOp, cv::MORPH_OPEN, cv::MORPH_CLOSE, cv::MORPH_GRADIENT, cv::MORPH_TOPHAT, cv::MORPH_BLACKHAT)
#define ALL_MORPH_OPS testing::Values(MorphOp(cv::MORPH_OPEN), MorphOp(cv::MORPH_CLOSE), MorphOp(cv::MORPH_GRADIENT), MorphOp(cv::MORPH_TOPHAT), MorphOp(cv::MORPH_BLACKHAT))
GPU_PERF_TEST(MorphologyEx, cv::gpu::DeviceInfo, cv::Size, MatType, MorphOp)
{
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
int morphOp = GET_PARAM(3);
cv::Mat src(size, type);
fill(src, 0.0, 255.0);
cv::Mat dst;
cv::Mat ker = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3));
cv::morphologyEx(src, dst, morphOp, ker);
declare.time(20.0);
TEST_CYCLE()
{
cv::morphologyEx(src, dst, morphOp, ker);
}
}
INSTANTIATE_TEST_CASE_P(Filters, MorphologyEx, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4)),
ALL_MORPH_OPS));
//////////////////////////////////////////////////////////////////////
// Filter2D
GPU_PERF_TEST(Filter2D, cv::gpu::DeviceInfo, cv::Size, MatType, KernelSize)
{
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
int ksize = GET_PARAM(3);
cv::Mat src(size, type);
fill(src, 0.0, 255.0);
cv::Mat kernel(ksize, ksize, CV_32FC1);
fill(kernel, 0.0, 1.0);
cv::Mat dst;
cv::filter2D(src, dst, -1, kernel);
declare.time(20.0);
TEST_CYCLE()
{
cv::filter2D(src, dst, -1, kernel);
}
}
INSTANTIATE_TEST_CASE_P(Filters, Filter2D, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC4)),
testing::Values(KernelSize(3), KernelSize(5), KernelSize(7), KernelSize(9), KernelSize(11), KernelSize(13), KernelSize(15))));
#endif

View File

@ -1,771 +0,0 @@
#include "perf_cpu_precomp.hpp"
#ifdef HAVE_CUDA
//////////////////////////////////////////////////////////////////////
// Remap
GPU_PERF_TEST(Remap, cv::gpu::DeviceInfo, cv::Size, MatType, Interpolation, BorderMode)
{
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
int interpolation = GET_PARAM(3);
int borderMode = GET_PARAM(4);
cv::Mat src(size, type);
fill(src, 0, 255);
cv::Mat xmap(size, CV_32FC1);
fill(xmap, 0, size.width);
cv::Mat ymap(size, CV_32FC1);
fill(ymap, 0, size.height);
cv::Mat dst;
cv::remap(src, dst, xmap, ymap, interpolation, borderMode);
declare.time(20.0);
TEST_CYCLE()
{
cv::remap(src, dst, xmap, ymap, interpolation, borderMode);
}
}
INSTANTIATE_TEST_CASE_P(ImgProc, Remap, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)),
testing::Values(BorderMode(cv::BORDER_REFLECT101), BorderMode(cv::BORDER_REPLICATE), BorderMode(cv::BORDER_CONSTANT), BorderMode(cv::BORDER_REFLECT), BorderMode(cv::BORDER_WRAP))));
//////////////////////////////////////////////////////////////////////
// Resize
IMPLEMENT_PARAM_CLASS(Scale, double)
GPU_PERF_TEST(Resize, cv::gpu::DeviceInfo, cv::Size, MatType, Interpolation, Scale)
{
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
int interpolation = GET_PARAM(3);
double f = GET_PARAM(4);
cv::Mat src(size, type);
fill(src, 0, 255);
cv::Mat dst;
cv::resize(src, dst, cv::Size(), f, f, interpolation);
declare.time(20.0);
TEST_CYCLE()
{
cv::resize(src, dst, cv::Size(), f, f, interpolation);
}
}
INSTANTIATE_TEST_CASE_P(ImgProc, Resize, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR),
Interpolation(cv::INTER_CUBIC), Interpolation(cv::INTER_AREA)),
testing::Values(Scale(0.5), Scale(0.3), Scale(2.0))));
GPU_PERF_TEST(ResizeArea, cv::gpu::DeviceInfo, cv::Size, MatType, Scale)
{
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
int interpolation = cv::INTER_AREA;
double f = GET_PARAM(3);
cv::Mat src_host(size, type);
fill(src_host, 0, 255);
cv::Mat src(src_host);
cv::Mat dst;
cv::resize(src, dst, cv::Size(), f, f, interpolation);
declare.time(1.0);
TEST_CYCLE()
{
cv::resize(src, dst, cv::Size(), f, f, interpolation);
}
}
INSTANTIATE_TEST_CASE_P(ImgProc, ResizeArea, testing::Combine(
ALL_DEVICES,
testing::Values(perf::sz1080p, cv::Size(4096, 2048)),
testing::Values(MatType(CV_8UC1)/*, MatType(CV_8UC3), MatType(CV_8UC4),
MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)*/),
testing::Values(Scale(0.2),Scale(0.1),Scale(0.05))));
//////////////////////////////////////////////////////////////////////
// WarpAffine
GPU_PERF_TEST(WarpAffine, cv::gpu::DeviceInfo, cv::Size, MatType, Interpolation, BorderMode)
{
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
int interpolation = GET_PARAM(3);
int borderMode = GET_PARAM(4);
cv::Mat src(size, type);
fill(src, 0, 255);
cv::Mat dst;
const double aplha = CV_PI / 4;
double mat[2][3] = { {std::cos(aplha), -std::sin(aplha), src.cols / 2},
{std::sin(aplha), std::cos(aplha), 0}};
cv::Mat M(2, 3, CV_64F, (void*) mat);
cv::warpAffine(src, dst, M, size, interpolation, borderMode);
declare.time(20.0);
TEST_CYCLE()
{
cv::warpAffine(src, dst, M, size, interpolation, borderMode);
}
}
INSTANTIATE_TEST_CASE_P(ImgProc, WarpAffine, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)),
testing::Values(BorderMode(cv::BORDER_REFLECT101), BorderMode(cv::BORDER_REPLICATE), BorderMode(cv::BORDER_CONSTANT), BorderMode(cv::BORDER_REFLECT), BorderMode(cv::BORDER_WRAP))));
//////////////////////////////////////////////////////////////////////
// WarpPerspective
GPU_PERF_TEST(WarpPerspective, cv::gpu::DeviceInfo, cv::Size, MatType, Interpolation, BorderMode)
{
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
int interpolation = GET_PARAM(3);
int borderMode = GET_PARAM(4);
cv::Mat src(size, type);
fill(src, 0, 255);
cv::Mat dst;
const double aplha = CV_PI / 4;
double mat[3][3] = { {std::cos(aplha), -std::sin(aplha), src.cols / 2},
{std::sin(aplha), std::cos(aplha), 0},
{0.0, 0.0, 1.0}};
cv::Mat M(3, 3, CV_64F, (void*) mat);
cv::warpPerspective(src, dst, M, size, interpolation, borderMode);
declare.time(20.0);
TEST_CYCLE()
{
cv::warpPerspective(src, dst, M, size, interpolation, borderMode);
}
}
INSTANTIATE_TEST_CASE_P(ImgProc, WarpPerspective, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)),
testing::Values(BorderMode(cv::BORDER_REFLECT101), BorderMode(cv::BORDER_REPLICATE), BorderMode(cv::BORDER_CONSTANT), BorderMode(cv::BORDER_REFLECT), BorderMode(cv::BORDER_WRAP))));
//////////////////////////////////////////////////////////////////////
// CopyMakeBorder
GPU_PERF_TEST(CopyMakeBorder, cv::gpu::DeviceInfo, cv::Size, MatType, BorderMode)
{
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
int borderType = GET_PARAM(3);
cv::Mat src(size, type);
fill(src, 0, 255);
cv::Mat dst;
cv::copyMakeBorder(src, dst, 5, 5, 5, 5, borderType);
TEST_CYCLE()
{
cv::copyMakeBorder(src, dst, 5, 5, 5, 5, borderType);
}
}
INSTANTIATE_TEST_CASE_P(ImgProc, CopyMakeBorder, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
testing::Values(BorderMode(cv::BORDER_REFLECT101), BorderMode(cv::BORDER_REPLICATE), BorderMode(cv::BORDER_CONSTANT), BorderMode(cv::BORDER_REFLECT), BorderMode(cv::BORDER_WRAP))));
//////////////////////////////////////////////////////////////////////
// Threshold
CV_ENUM(ThreshOp, cv::THRESH_BINARY, cv::THRESH_BINARY_INV, cv::THRESH_TRUNC, cv::THRESH_TOZERO, cv::THRESH_TOZERO_INV)
#define ALL_THRESH_OPS testing::Values(ThreshOp(cv::THRESH_BINARY), ThreshOp(cv::THRESH_BINARY_INV), ThreshOp(cv::THRESH_TRUNC), ThreshOp(cv::THRESH_TOZERO), ThreshOp(cv::THRESH_TOZERO_INV))
GPU_PERF_TEST(Threshold, cv::gpu::DeviceInfo, cv::Size, MatDepth, ThreshOp)
{
cv::Size size = GET_PARAM(1);
int depth = GET_PARAM(2);
int threshOp = GET_PARAM(3);
cv::Mat src(size, depth);
fill(src, 0, 255);
cv::Mat dst;
cv::threshold(src, dst, 100.0, 255.0, threshOp);
TEST_CYCLE()
{
cv::threshold(src, dst, 100.0, 255.0, threshOp);
}
}
INSTANTIATE_TEST_CASE_P(ImgProc, Threshold, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_32F), MatDepth(CV_64F)),
ALL_THRESH_OPS));
//////////////////////////////////////////////////////////////////////
// Integral
GPU_PERF_TEST(Integral, cv::gpu::DeviceInfo, cv::Size)
{
cv::Size size = GET_PARAM(1);
cv::Mat src(size, CV_8UC1);
fill(src, 0, 255);
cv::Mat dst;
cv::integral(src, dst);
TEST_CYCLE()
{
cv::integral(src, dst);
}
}
INSTANTIATE_TEST_CASE_P(ImgProc, Integral, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES));
//////////////////////////////////////////////////////////////////////
// HistEven_OneChannel
GPU_PERF_TEST(HistEven_OneChannel, cv::gpu::DeviceInfo, cv::Size, MatDepth)
{
cv::Size size = GET_PARAM(1);
int depth = GET_PARAM(2);
cv::Mat src(size, depth);
fill(src, 0, 255);
int hbins = 30;
float hranges[] = {0.0f, 180.0f};
cv::Mat hist;
int histSize[] = {hbins};
const float* ranges[] = {hranges};
int channels[] = {0};
cv::calcHist(&src, 1, channels, cv::Mat(), hist, 1, histSize, ranges);
TEST_CYCLE()
{
cv::calcHist(&src, 1, channels, cv::Mat(), hist, 1, histSize, ranges);
}
}
INSTANTIATE_TEST_CASE_P(ImgProc, HistEven_OneChannel, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_16S))));
//////////////////////////////////////////////////////////////////////
// EqualizeHist
GPU_PERF_TEST(EqualizeHist, cv::gpu::DeviceInfo, cv::Size)
{
cv::Size size = GET_PARAM(1);
cv::Mat src(size, CV_8UC1);
fill(src, 0, 255);
cv::Mat dst;
cv::equalizeHist(src, dst);
TEST_CYCLE()
{
cv::equalizeHist(src, dst);
}
}
INSTANTIATE_TEST_CASE_P(ImgProc, EqualizeHist, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES));
//////////////////////////////////////////////////////////////////////
// Canny
IMPLEMENT_PARAM_CLASS(AppertureSize, int)
IMPLEMENT_PARAM_CLASS(L2gradient, bool)
GPU_PERF_TEST(Canny, cv::gpu::DeviceInfo, AppertureSize, L2gradient)
{
int apperture_size = GET_PARAM(1);
bool useL2gradient = GET_PARAM(2);
cv::Mat image = readImage("perf/1280x1024.jpg", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(image.empty());
cv::Mat dst;
cv::Canny(image, dst, 50.0, 100.0, apperture_size, useL2gradient);
TEST_CYCLE()
{
cv::Canny(image, dst, 50.0, 100.0, apperture_size, useL2gradient);
}
}
INSTANTIATE_TEST_CASE_P(ImgProc, Canny, testing::Combine(
ALL_DEVICES,
testing::Values(AppertureSize(3), AppertureSize(5)),
testing::Values(L2gradient(false), L2gradient(true))));
//////////////////////////////////////////////////////////////////////
// MeanShiftFiltering
GPU_PERF_TEST_1(MeanShiftFiltering, cv::gpu::DeviceInfo)
{
cv::Mat img = readImage("gpu/meanshift/cones.png");
ASSERT_FALSE(img.empty());
cv::Mat dst;
cv::pyrMeanShiftFiltering(img, dst, 50, 50);
declare.time(15.0);
TEST_CYCLE()
{
cv::pyrMeanShiftFiltering(img, dst, 50, 50);
}
}
INSTANTIATE_TEST_CASE_P(ImgProc, MeanShiftFiltering, ALL_DEVICES);
//////////////////////////////////////////////////////////////////////
// Convolve
IMPLEMENT_PARAM_CLASS(KSize, int)
IMPLEMENT_PARAM_CLASS(Ccorr, bool)
GPU_PERF_TEST(Convolve, cv::gpu::DeviceInfo, cv::Size, KSize, Ccorr)
{
cv::Size size = GET_PARAM(1);
int templ_size = GET_PARAM(2);
bool ccorr = GET_PARAM(3);
ASSERT_FALSE(ccorr);
cv::Mat image(size, CV_32FC1);
image.setTo(1.0);
cv::Mat templ(templ_size, templ_size, CV_32FC1);
templ.setTo(1.0);
cv::Mat dst;
cv::filter2D(image, dst, image.depth(), templ);
declare.time(10.0);
TEST_CYCLE()
{
cv::filter2D(image, dst, image.depth(), templ);
}
}
INSTANTIATE_TEST_CASE_P(ImgProc, Convolve, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(KSize(3), KSize(9), KSize(17), KSize(27), KSize(32), KSize(64)),
testing::Values(Ccorr(false), Ccorr(true))));
////////////////////////////////////////////////////////////////////////////////
// MatchTemplate_8U
CV_ENUM(TemplateMethod, cv::TM_SQDIFF, cv::TM_SQDIFF_NORMED, cv::TM_CCORR, cv::TM_CCORR_NORMED, cv::TM_CCOEFF, cv::TM_CCOEFF_NORMED)
#define ALL_TEMPLATE_METHODS testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_SQDIFF_NORMED), TemplateMethod(cv::TM_CCORR), TemplateMethod(cv::TM_CCORR_NORMED), TemplateMethod(cv::TM_CCOEFF), TemplateMethod(cv::TM_CCOEFF_NORMED))
IMPLEMENT_PARAM_CLASS(TemplateSize, cv::Size)
GPU_PERF_TEST(MatchTemplate_8U, cv::gpu::DeviceInfo, cv::Size, TemplateSize, Channels, TemplateMethod)
{
cv::Size size = GET_PARAM(1);
cv::Size templ_size = GET_PARAM(2);
int cn = GET_PARAM(3);
int method = GET_PARAM(4);
cv::Mat image(size, CV_MAKE_TYPE(CV_8U, cn));
fill(image, 0, 255);
cv::Mat templ(templ_size, CV_MAKE_TYPE(CV_8U, cn));
fill(templ, 0, 255);
cv::Mat dst;
cv::matchTemplate(image, templ, dst, method);
TEST_CYCLE()
{
cv::matchTemplate(image, templ, dst, method);
}
};
INSTANTIATE_TEST_CASE_P(ImgProc, MatchTemplate_8U, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16)), TemplateSize(cv::Size(30, 30))),
testing::Values(Channels(1), Channels(3), Channels(4)),
ALL_TEMPLATE_METHODS));
////////////////////////////////////////////////////////////////////////////////
// MatchTemplate_32F
GPU_PERF_TEST(MatchTemplate_32F, cv::gpu::DeviceInfo, cv::Size, TemplateSize, Channels, TemplateMethod)
{
cv::Size size = GET_PARAM(1);
cv::Size templ_size = GET_PARAM(2);
int cn = GET_PARAM(3);
int method = GET_PARAM(4);
cv::Mat image(size, CV_MAKE_TYPE(CV_32F, cn));
fill(image, 0, 255);
cv::Mat templ(templ_size, CV_MAKE_TYPE(CV_32F, cn));
fill(templ, 0, 255);
cv::Mat dst;
cv::matchTemplate(image, templ, dst, method);
TEST_CYCLE()
{
cv::matchTemplate(image, templ, dst, method);
}
};
INSTANTIATE_TEST_CASE_P(ImgProc, MatchTemplate_32F, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16)), TemplateSize(cv::Size(30, 30))),
testing::Values(Channels(1), Channels(3), Channels(4)),
testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR))));
//////////////////////////////////////////////////////////////////////
// MulSpectrums
CV_FLAGS(DftFlags, 0, cv::DFT_INVERSE, cv::DFT_SCALE, cv::DFT_ROWS, cv::DFT_COMPLEX_OUTPUT, cv::DFT_REAL_OUTPUT)
GPU_PERF_TEST(MulSpectrums, cv::gpu::DeviceInfo, cv::Size, DftFlags)
{
cv::Size size = GET_PARAM(1);
int flag = GET_PARAM(2);
cv::Mat a(size, CV_32FC2);
fill(a, 0, 100);
cv::Mat b(size, CV_32FC2);
fill(b, 0, 100);
cv::Mat dst;
cv::mulSpectrums(a, b, dst, flag);
TEST_CYCLE()
{
cv::mulSpectrums(a, b, dst, flag);
}
}
INSTANTIATE_TEST_CASE_P(ImgProc, MulSpectrums, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(DftFlags(0), DftFlags(cv::DFT_ROWS))));
//////////////////////////////////////////////////////////////////////
// Dft
GPU_PERF_TEST(Dft, cv::gpu::DeviceInfo, cv::Size, DftFlags)
{
cv::Size size = GET_PARAM(1);
int flag = GET_PARAM(2);
cv::Mat src(size, CV_32FC2);
fill(src, 0, 100);
cv::Mat dst;
cv::dft(src, dst, flag);
declare.time(10.0);
TEST_CYCLE()
{
cv::dft(src, dst, flag);
}
}
INSTANTIATE_TEST_CASE_P(ImgProc, Dft, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(DftFlags(0), DftFlags(cv::DFT_ROWS), DftFlags(cv::DFT_INVERSE))));
//////////////////////////////////////////////////////////////////////
// CornerHarris
IMPLEMENT_PARAM_CLASS(BlockSize, int)
IMPLEMENT_PARAM_CLASS(ApertureSize, int)
GPU_PERF_TEST(CornerHarris, cv::gpu::DeviceInfo, MatType, BorderMode, BlockSize, ApertureSize)
{
int type = GET_PARAM(1);
int borderType = GET_PARAM(2);
int blockSize = GET_PARAM(3);
int apertureSize = GET_PARAM(4);
cv::Mat img = readImage("gpu/stereobm/aloe-L.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
img.convertTo(img, type, type == CV_32F ? 1.0 / 255.0 : 1.0);
cv::Mat dst;
double k = 0.5;
cv::cornerHarris(img, dst, blockSize, apertureSize, k, borderType);
TEST_CYCLE()
{
cv::cornerHarris(img, dst, blockSize, apertureSize, k, borderType);
}
}
INSTANTIATE_TEST_CASE_P(ImgProc, CornerHarris, testing::Combine(
ALL_DEVICES,
testing::Values(MatType(CV_8UC1), MatType(CV_32FC1)),
testing::Values(BorderMode(cv::BORDER_REFLECT101), BorderMode(cv::BORDER_REPLICATE), BorderMode(cv::BORDER_REFLECT)),
testing::Values(BlockSize(3), BlockSize(5), BlockSize(7)),
testing::Values(ApertureSize(0), ApertureSize(3), ApertureSize(5), ApertureSize(7))));
//////////////////////////////////////////////////////////////////////
// CornerMinEigenVal
GPU_PERF_TEST(CornerMinEigenVal, cv::gpu::DeviceInfo, MatType, BorderMode, BlockSize, ApertureSize)
{
int type = GET_PARAM(1);
int borderType = GET_PARAM(2);
int blockSize = GET_PARAM(3);
int apertureSize = GET_PARAM(4);
cv::Mat img = readImage("gpu/stereobm/aloe-L.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
img.convertTo(img, type, type == CV_32F ? 1.0 / 255.0 : 1.0);
cv::Mat dst;
cv::cornerMinEigenVal(img, dst, blockSize, apertureSize, borderType);
TEST_CYCLE()
{
cv::cornerMinEigenVal(img, dst, blockSize, apertureSize, borderType);
}
}
INSTANTIATE_TEST_CASE_P(ImgProc, CornerMinEigenVal, testing::Combine(
ALL_DEVICES,
testing::Values(MatType(CV_8UC1), MatType(CV_32FC1)),
testing::Values(BorderMode(cv::BORDER_REFLECT101), BorderMode(cv::BORDER_REPLICATE), BorderMode(cv::BORDER_REFLECT)),
testing::Values(BlockSize(3), BlockSize(5), BlockSize(7)),
testing::Values(ApertureSize(0), ApertureSize(3), ApertureSize(5), ApertureSize(7))));
//////////////////////////////////////////////////////////////////////
// PyrDown
GPU_PERF_TEST(PyrDown, cv::gpu::DeviceInfo, cv::Size, MatType)
{
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
cv::Mat src(size, type);
fill(src, 0, 255);
cv::Mat dst;
cv::pyrDown(src, dst);
TEST_CYCLE()
{
cv::pyrDown(src, dst);
}
}
INSTANTIATE_TEST_CASE_P(ImgProc, PyrDown, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4))));
//////////////////////////////////////////////////////////////////////
// PyrUp
GPU_PERF_TEST(PyrUp, cv::gpu::DeviceInfo, cv::Size, MatType)
{
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
cv::Mat src(size, type);
fill(src, 0, 255);
cv::Mat dst;
cv::pyrUp(src, dst);
TEST_CYCLE()
{
cv::pyrUp(src, dst);
}
}
INSTANTIATE_TEST_CASE_P(ImgProc, PyrUp, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4))));
//////////////////////////////////////////////////////////////////////
// CvtColor
GPU_PERF_TEST(CvtColor, cv::gpu::DeviceInfo, cv::Size, MatDepth, CvtColorInfo)
{
cv::Size size = GET_PARAM(1);
int depth = GET_PARAM(2);
CvtColorInfo info = GET_PARAM(3);
cv::Mat src(size, CV_MAKETYPE(depth, info.scn));
fill(src, 0, 255);
cv::Mat dst;
cv::cvtColor(src, dst, info.code, info.dcn);
TEST_CYCLE()
{
cv::cvtColor(src, dst, info.code, info.dcn);
}
}
INSTANTIATE_TEST_CASE_P(ImgProc, CvtColor, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_32F)),
testing::Values(CvtColorInfo(4, 4, cv::COLOR_RGBA2BGRA),
CvtColorInfo(4, 1, cv::COLOR_BGRA2GRAY),
CvtColorInfo(1, 4, cv::COLOR_GRAY2BGRA),
CvtColorInfo(3, 3, cv::COLOR_BGR2XYZ),
CvtColorInfo(3, 3, cv::COLOR_XYZ2BGR),
CvtColorInfo(3, 3, cv::COLOR_BGR2YCrCb),
CvtColorInfo(3, 3, cv::COLOR_YCrCb2BGR),
CvtColorInfo(3, 3, cv::COLOR_BGR2YUV),
CvtColorInfo(3, 3, cv::COLOR_YUV2BGR),
CvtColorInfo(3, 3, cv::COLOR_BGR2HSV),
CvtColorInfo(3, 3, cv::COLOR_HSV2BGR),
CvtColorInfo(3, 3, cv::COLOR_BGR2HLS),
CvtColorInfo(3, 3, cv::COLOR_HLS2BGR),
CvtColorInfo(3, 3, cv::COLOR_BGR2Lab),
CvtColorInfo(3, 3, cv::COLOR_RGB2Lab),
CvtColorInfo(3, 3, cv::COLOR_BGR2Luv),
CvtColorInfo(3, 3, cv::COLOR_RGB2Luv),
CvtColorInfo(3, 3, cv::COLOR_Lab2BGR),
CvtColorInfo(3, 3, cv::COLOR_Lab2RGB),
CvtColorInfo(3, 3, cv::COLOR_Luv2BGR),
CvtColorInfo(3, 3, cv::COLOR_Luv2RGB),
CvtColorInfo(1, 3, cv::COLOR_BayerBG2BGR),
CvtColorInfo(1, 3, cv::COLOR_BayerGB2BGR),
CvtColorInfo(1, 3, cv::COLOR_BayerRG2BGR),
CvtColorInfo(1, 3, cv::COLOR_BayerGR2BGR),
CvtColorInfo(4, 4, cv::COLOR_RGBA2mRGBA))));
//////////////////////////////////////////////////////////////////////
// HoughLines
IMPLEMENT_PARAM_CLASS(DoSort, bool)
GPU_PERF_TEST(HoughLines, cv::gpu::DeviceInfo, cv::Size, DoSort)
{
declare.time(30.0);
const cv::Size size = GET_PARAM(1);
const float rho = 1.0f;
const float theta = CV_PI / 180.0f;
const int threshold = 300;
cv::RNG rng(123456789);
cv::Mat src(size, CV_8UC1, cv::Scalar::all(0));
const int numLines = rng.uniform(500, 2000);
for (int i = 0; i < numLines; ++i)
{
cv::Point p1(rng.uniform(0, src.cols), rng.uniform(0, src.rows));
cv::Point p2(rng.uniform(0, src.cols), rng.uniform(0, src.rows));
cv::line(src, p1, p2, cv::Scalar::all(255), 2);
}
std::vector<cv::Vec2f> lines;
cv::HoughLines(src, lines, rho, theta, threshold);
TEST_CYCLE()
{
cv::HoughLines(src, lines, rho, theta, threshold);
}
}
INSTANTIATE_TEST_CASE_P(ImgProc, HoughLines, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(DoSort(false), DoSort(true))));
#endif

View File

@ -1,158 +0,0 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2008-2011, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//M*/
#include "perf_precomp.hpp"
#ifdef HAVE_CUDA
namespace {
struct GreedyLabeling
{
struct dot
{
int x;
int y;
static dot make(int i, int j)
{
dot d; d.x = i; d.y = j;
return d;
}
};
struct InInterval
{
InInterval(const int& _lo, const int& _hi) : lo(-_lo), hi(_hi) {};
const int lo, hi;
bool operator() (const unsigned char a, const unsigned char b) const
{
int d = a - b;
return lo <= d && d <= hi;
}
};
GreedyLabeling(cv::Mat img)
: image(img), _labels(image.size(), CV_32SC1, cv::Scalar::all(-1)) {stack = new dot[image.cols * image.rows];}
~GreedyLabeling(){delete[] stack;}
void operator() (cv::Mat labels) const
{
labels.setTo(cv::Scalar::all(-1));
InInterval inInt(0, 2);
int cc = -1;
int* dist_labels = (int*)labels.data;
int pitch = labels.step1();
unsigned char* source = (unsigned char*)image.data;
int width = image.cols;
int height = image.rows;
for (int j = 0; j < image.rows; ++j)
for (int i = 0; i < image.cols; ++i)
{
if (dist_labels[j * pitch + i] != -1) continue;
dot* top = stack;
dot p = dot::make(i, j);
cc++;
dist_labels[j * pitch + i] = cc;
while (top >= stack)
{
int* dl = &dist_labels[p.y * pitch + p.x];
unsigned char* sp = &source[p.y * image.step1() + p.x];
dl[0] = cc;
//right
if( p.x < (width - 1) && dl[ +1] == -1 && inInt(sp[0], sp[+1]))
*top++ = dot::make(p.x + 1, p.y);
//left
if( p.x > 0 && dl[-1] == -1 && inInt(sp[0], sp[-1]))
*top++ = dot::make(p.x - 1, p.y);
//bottom
if( p.y < (height - 1) && dl[+pitch] == -1 && inInt(sp[0], sp[+image.step1()]))
*top++ = dot::make(p.x, p.y + 1);
//top
if( p.y > 0 && dl[-pitch] == -1 && inInt(sp[0], sp[-image.step1()]))
*top++ = dot::make(p.x, p.y - 1);
p = *--top;
}
}
}
cv::Mat image;
cv::Mat _labels;
dot* stack;
};
}
GPU_PERF_TEST(ConnectedComponents, cv::gpu::DeviceInfo, cv::Size)
{
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::gpu::setDevice(devInfo.deviceID());
cv::Mat image = readImage("gpu/labeling/aloe-disp.png", cv::IMREAD_GRAYSCALE);
GreedyLabeling host(image);
host(host._labels);
declare.time(1.0);
TEST_CYCLE()
{
host(host._labels);
}
}
INSTANTIATE_TEST_CASE_P(Labeling, ConnectedComponents, testing::Combine(ALL_DEVICES, testing::Values(cv::Size(261, 262))));
#endif

View File

@ -1,20 +0,0 @@
#include "perf_cpu_precomp.hpp"
#ifdef HAVE_CUDA
int main(int argc, char **argv)
{
testing::InitGoogleTest(&argc, argv);
perf::TestBase::Init(argc, argv);
return RUN_ALL_TESTS();
}
#else
int main()
{
printf("OpenCV was built without CUDA support\n");
return 0;
}
#endif

View File

@ -1,124 +0,0 @@
#include "perf_cpu_precomp.hpp"
#ifdef HAVE_CUDA
//////////////////////////////////////////////////////////////////////
// SetTo
GPU_PERF_TEST(SetTo, cv::gpu::DeviceInfo, cv::Size, MatType)
{
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
cv::Mat src(size, type);
cv::Scalar val(1, 2, 3, 4);
src.setTo(val);
TEST_CYCLE()
{
src.setTo(val);
}
}
INSTANTIATE_TEST_CASE_P(MatOp, SetTo, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4),
MatType(CV_64FC1), MatType(CV_64FC3), MatType(CV_64FC4))));
//////////////////////////////////////////////////////////////////////
// SetToMasked
GPU_PERF_TEST(SetToMasked, cv::gpu::DeviceInfo, cv::Size, MatType)
{
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
cv::Mat src(size, type);
fill(src, 0, 255);
cv::Mat mask(size, CV_8UC1);
fill(mask, 0, 2);
cv::Scalar val(1, 2, 3, 4);
src.setTo(val, mask);
TEST_CYCLE()
{
src.setTo(val, mask);
}
}
INSTANTIATE_TEST_CASE_P(MatOp, SetToMasked, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4),
MatType(CV_64FC1), MatType(CV_64FC3), MatType(CV_64FC4))));
//////////////////////////////////////////////////////////////////////
// CopyToMasked
GPU_PERF_TEST(CopyToMasked, cv::gpu::DeviceInfo, cv::Size, MatType)
{
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
cv::Mat src(size, type);
fill(src, 0, 255);
cv::Mat mask(size, CV_8UC1);
fill(mask, 0, 2);
cv::Mat dst;
src.copyTo(dst, mask);
TEST_CYCLE()
{
src.copyTo(dst, mask);
}
}
INSTANTIATE_TEST_CASE_P(MatOp, CopyToMasked, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4),
MatType(CV_64FC1), MatType(CV_64FC3), MatType(CV_64FC4))));
//////////////////////////////////////////////////////////////////////
// ConvertTo
GPU_PERF_TEST(ConvertTo, cv::gpu::DeviceInfo, cv::Size, MatDepth, MatDepth)
{
cv::Size size = GET_PARAM(1);
int depth1 = GET_PARAM(2);
int depth2 = GET_PARAM(3);
cv::Mat src(size, depth1);
fill(src, 0, 255);
cv::Mat dst;
src.convertTo(dst, depth2, 0.5, 1.0);
TEST_CYCLE()
{
src.convertTo(dst, depth2, 0.5, 1.0);
}
}
INSTANTIATE_TEST_CASE_P(MatOp, ConvertTo, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_32F), MatDepth(CV_64F)),
testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_32F), MatDepth(CV_64F))));
#endif

View File

@ -1,74 +0,0 @@
#include "perf_cpu_precomp.hpp"
#ifdef HAVE_CUDA
///////////////////////////////////////////////////////////////
// HOG
GPU_PERF_TEST_1(HOG, cv::gpu::DeviceInfo)
{
cv::Mat img = readImage("gpu/hog/road.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
std::vector<cv::Rect> found_locations;
cv::HOGDescriptor hog;
hog.setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
hog.detectMultiScale(img, found_locations);
TEST_CYCLE()
{
hog.detectMultiScale(img, found_locations);
}
}
INSTANTIATE_TEST_CASE_P(ObjDetect, HOG, ALL_DEVICES);
///////////////////////////////////////////////////////////////
// HaarClassifier
GPU_PERF_TEST_1(HaarClassifier, cv::gpu::DeviceInfo)
{
cv::Mat img = readImage("gpu/haarcascade/group_1_640x480_VGA.pgm", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
cv::CascadeClassifier cascade;
ASSERT_TRUE(cascade.load(perf::TestBase::getDataPath("gpu/perf/haarcascade_frontalface_alt.xml")));
std::vector<cv::Rect> rects;
cascade.detectMultiScale(img, rects);
TEST_CYCLE()
{
cascade.detectMultiScale(img, rects);
}
}
INSTANTIATE_TEST_CASE_P(ObjDetect, HaarClassifier, ALL_DEVICES);
//===================== LBP cascade ==========================//
GPU_PERF_TEST_1(LBPClassifier, cv::gpu::DeviceInfo)
{
cv::Mat img = readImage("gpu/haarcascade/group_1_640x480_VGA.pgm", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
cv::CascadeClassifier cascade;
ASSERT_TRUE(cascade.load(perf::TestBase::getDataPath("gpu/lbpcascade/lbpcascade_frontalface.xml")));
std::vector<cv::Rect> rects;
cascade.detectMultiScale(img, rects);
TEST_CYCLE()
{
cascade.detectMultiScale(img, rects);
}
}
INSTANTIATE_TEST_CASE_P(ObjDetect, LBPClassifier, ALL_DEVICES);
#endif

View File

@ -1,220 +0,0 @@
#include "perf_cpu_precomp.hpp"
using namespace std;
using namespace cv;
using namespace cv::gpu;
void fill(Mat& m, double a, double b)
{
RNG rng(123456789);
rng.fill(m, RNG::UNIFORM, a, b);
}
void PrintTo(const CvtColorInfo& info, ostream* os)
{
static const char* str[] =
{
"BGR2BGRA",
"BGRA2BGR",
"BGR2RGBA",
"RGBA2BGR",
"BGR2RGB",
"BGRA2RGBA",
"BGR2GRAY",
"RGB2GRAY",
"GRAY2BGR",
"GRAY2BGRA",
"BGRA2GRAY",
"RGBA2GRAY",
"BGR2BGR565",
"RGB2BGR565",
"BGR5652BGR",
"BGR5652RGB",
"BGRA2BGR565",
"RGBA2BGR565",
"BGR5652BGRA",
"BGR5652RGBA",
"GRAY2BGR565",
"BGR5652GRAY",
"BGR2BGR555",
"RGB2BGR555",
"BGR5552BGR",
"BGR5552RGB",
"BGRA2BGR555",
"RGBA2BGR555",
"BGR5552BGRA",
"BGR5552RGBA",
"GRAY2BGR555",
"BGR5552GRAY",
"BGR2XYZ",
"RGB2XYZ",
"XYZ2BGR",
"XYZ2RGB",
"BGR2YCrCb",
"RGB2YCrCb",
"YCrCb2BGR",
"YCrCb2RGB",
"BGR2HSV",
"RGB2HSV",
"",
"",
"BGR2Lab",
"RGB2Lab",
"BayerBG2BGR",
"BayerGB2BGR",
"BayerRG2BGR",
"BayerGR2BGR",
"BGR2Luv",
"RGB2Luv",
"BGR2HLS",
"RGB2HLS",
"HSV2BGR",
"HSV2RGB",
"Lab2BGR",
"Lab2RGB",
"Luv2BGR",
"Luv2RGB",
"HLS2BGR",
"HLS2RGB",
"BayerBG2BGR_VNG",
"BayerGB2BGR_VNG",
"BayerRG2BGR_VNG",
"BayerGR2BGR_VNG",
"BGR2HSV_FULL",
"RGB2HSV_FULL",
"BGR2HLS_FULL",
"RGB2HLS_FULL",
"HSV2BGR_FULL",
"HSV2RGB_FULL",
"HLS2BGR_FULL",
"HLS2RGB_FULL",
"LBGR2Lab",
"LRGB2Lab",
"LBGR2Luv",
"LRGB2Luv",
"Lab2LBGR",
"Lab2LRGB",
"Luv2LBGR",
"Luv2LRGB",
"BGR2YUV",
"RGB2YUV",
"YUV2BGR",
"YUV2RGB",
"BayerBG2GRAY",
"BayerGB2GRAY",
"BayerRG2GRAY",
"BayerGR2GRAY",
//YUV 4:2:0 formats family
"YUV2RGB_NV12",
"YUV2BGR_NV12",
"YUV2RGB_NV21",
"YUV2BGR_NV21",
"YUV2RGBA_NV12",
"YUV2BGRA_NV12",
"YUV2RGBA_NV21",
"YUV2BGRA_NV21",
"YUV2RGB_YV12",
"YUV2BGR_YV12",
"YUV2RGB_IYUV",
"YUV2BGR_IYUV",
"YUV2RGBA_YV12",
"YUV2BGRA_YV12",
"YUV2RGBA_IYUV",
"YUV2BGRA_IYUV",
"YUV2GRAY_420",
//YUV 4:2:2 formats family
"YUV2RGB_UYVY",
"YUV2BGR_UYVY",
"YUV2RGB_VYUY",
"YUV2BGR_VYUY",
"YUV2RGBA_UYVY",
"YUV2BGRA_UYVY",
"YUV2RGBA_VYUY",
"YUV2BGRA_VYUY",
"YUV2RGB_YUY2",
"YUV2BGR_YUY2",
"YUV2RGB_YVYU",
"YUV2BGR_YVYU",
"YUV2RGBA_YUY2",
"YUV2BGRA_YUY2",
"YUV2RGBA_YVYU",
"YUV2BGRA_YVYU",
"YUV2GRAY_UYVY",
"YUV2GRAY_YUY2",
// alpha premultiplication
"RGBA2mRGBA",
"mRGBA2RGBA",
"COLORCVT_MAX"
};
*os << str[info.code];
}
void cv::gpu::PrintTo(const DeviceInfo& info, ostream* os)
{
*os << info.name();
}
Mat readImage(const string& fileName, int flags)
{
return imread(perf::TestBase::getDataPath(fileName), flags);
}
const vector<DeviceInfo>& devices()
{
static vector<DeviceInfo> devs;
static bool first = true;
if (first)
{
int deviceCount = getCudaEnabledDeviceCount();
devs.reserve(deviceCount);
for (int i = 0; i < deviceCount; ++i)
{
DeviceInfo info(i);
if (info.isCompatible())
devs.push_back(info);
}
first = false;
}
return devs;
}

View File

@ -1,77 +0,0 @@
#ifndef __OPENCV_PERF_GPU_UTILITY_HPP__
#define __OPENCV_PERF_GPU_UTILITY_HPP__
void fill(cv::Mat& m, double a, double b);
using perf::MatType;
using perf::MatDepth;
CV_ENUM(BorderMode, cv::BORDER_REFLECT101, cv::BORDER_REPLICATE, cv::BORDER_CONSTANT, cv::BORDER_REFLECT, cv::BORDER_WRAP)
CV_ENUM(Interpolation, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC, cv::INTER_AREA)
CV_ENUM(NormType, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2, cv::NORM_HAMMING)
struct CvtColorInfo
{
int scn;
int dcn;
int code;
explicit CvtColorInfo(int scn_=0, int dcn_=0, int code_=0) : scn(scn_), dcn(dcn_), code(code_) {}
};
void PrintTo(const CvtColorInfo& info, std::ostream* os);
#define IMPLEMENT_PARAM_CLASS(name, type) \
class name \
{ \
public: \
name ( type arg = type ()) : val_(arg) {} \
operator type () const {return val_;} \
private: \
type val_; \
}; \
inline void PrintTo( name param, std::ostream* os) \
{ \
*os << #name << " = " << testing::PrintToString(static_cast< type >(param)); \
}
IMPLEMENT_PARAM_CLASS(Channels, int)
namespace cv { namespace gpu
{
void PrintTo(const cv::gpu::DeviceInfo& info, std::ostream* os);
}}
#define GPU_PERF_TEST(name, ...) \
struct name : perf::TestBaseWithParam< std::tr1::tuple< __VA_ARGS__ > > \
{ \
public: \
name() {} \
protected: \
void PerfTestBody(); \
}; \
TEST_P(name, perf){ RunPerfTestBody(); } \
void name :: PerfTestBody()
#define GPU_PERF_TEST_1(name, param_type) \
struct name : perf::TestBaseWithParam< param_type > \
{ \
public: \
name() {} \
protected: \
void PerfTestBody(); \
}; \
TEST_P(name, perf){ RunPerfTestBody(); } \
void name :: PerfTestBody()
#define GPU_TYPICAL_MAT_SIZES testing::Values(perf::szSXGA, perf::sz1080p, cv::Size(1800, 1500))
cv::Mat readImage(const std::string& fileName, int flags = cv::IMREAD_COLOR);
const std::vector<cv::gpu::DeviceInfo>& devices();
#define ALL_DEVICES testing::ValuesIn(devices())
#define GET_PARAM(k) std::tr1::get< k >(GetParam())
#endif // __OPENCV_PERF_GPU_UTILITY_HPP__

View File

@ -1,466 +0,0 @@
#include "perf_cpu_precomp.hpp"
#ifdef HAVE_CUDA
//////////////////////////////////////////////////////
// GoodFeaturesToTrack
IMPLEMENT_PARAM_CLASS(MinDistance, double)
GPU_PERF_TEST(GoodFeaturesToTrack, cv::gpu::DeviceInfo, MinDistance)
{
double minDistance = GET_PARAM(1);
cv::Mat image = readImage("gpu/perf/aloe.jpg", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(image.empty());
cv::Mat corners;
cv::goodFeaturesToTrack(image, corners, 8000, 0.01, minDistance);
TEST_CYCLE()
{
cv::goodFeaturesToTrack(image, corners, 8000, 0.01, minDistance);
}
}
INSTANTIATE_TEST_CASE_P(Video, GoodFeaturesToTrack, testing::Combine(
ALL_DEVICES,
testing::Values(MinDistance(0.0), MinDistance(3.0))));
//////////////////////////////////////////////////////
// PyrLKOpticalFlowSparse
IMPLEMENT_PARAM_CLASS(GraySource, bool)
IMPLEMENT_PARAM_CLASS(Points, int)
IMPLEMENT_PARAM_CLASS(WinSize, int)
IMPLEMENT_PARAM_CLASS(Levels, int)
IMPLEMENT_PARAM_CLASS(Iters, int)
GPU_PERF_TEST(PyrLKOpticalFlowSparse, cv::gpu::DeviceInfo, GraySource, Points, WinSize, Levels, Iters)
{
bool useGray = GET_PARAM(1);
int points = GET_PARAM(2);
int win_size = GET_PARAM(3);
int levels = GET_PARAM(4);
int iters = GET_PARAM(5);
cv::Mat frame0 = readImage("gpu/opticalflow/frame0.png", useGray ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR);
ASSERT_FALSE(frame0.empty());
cv::Mat frame1 = readImage("gpu/opticalflow/frame1.png", useGray ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR);
ASSERT_FALSE(frame1.empty());
cv::Mat gray_frame;
if (useGray)
gray_frame = frame0;
else
cv::cvtColor(frame0, gray_frame, cv::COLOR_BGR2GRAY);
cv::Mat pts;
cv::goodFeaturesToTrack(gray_frame, pts, points, 0.01, 0.0);
cv::Mat nextPts;
cv::Mat status;
cv::calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts, status, cv::noArray(),
cv::Size(win_size, win_size), levels - 1,
cv::TermCriteria(cv::TermCriteria::COUNT + cv::TermCriteria::EPS, iters, 0.01));
declare.time(20.0);
TEST_CYCLE()
{
cv::calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts, status, cv::noArray(),
cv::Size(win_size, win_size), levels - 1,
cv::TermCriteria(cv::TermCriteria::COUNT + cv::TermCriteria::EPS, iters, 0.01));
}
}
INSTANTIATE_TEST_CASE_P(Video, PyrLKOpticalFlowSparse, testing::Combine(
ALL_DEVICES,
testing::Values(GraySource(true), GraySource(false)),
testing::Values(Points(1000), Points(2000), Points(4000), Points(8000)),
testing::Values(WinSize(9), WinSize(13), WinSize(17), WinSize(21)),
testing::Values(Levels(1), Levels(2), Levels(3)),
testing::Values(Iters(1), Iters(10), Iters(30))));
//////////////////////////////////////////////////////
// FarnebackOpticalFlowTest
GPU_PERF_TEST_1(FarnebackOpticalFlowTest, cv::gpu::DeviceInfo)
{
cv::Mat frame0 = readImage("gpu/opticalflow/frame0.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame0.empty());
cv::Mat frame1 = readImage("gpu/opticalflow/frame1.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame1.empty());
cv::Mat flow;
int numLevels = 5;
double pyrScale = 0.5;
int winSize = 13;
int numIters = 10;
int polyN = 5;
double polySigma = 1.1;
int flags = 0;
cv::calcOpticalFlowFarneback(frame0, frame1, flow, pyrScale, numLevels, winSize, numIters, polyN, polySigma, flags);
declare.time(10);
TEST_CYCLE()
{
cv::calcOpticalFlowFarneback(frame0, frame1, flow, pyrScale, numLevels, winSize, numIters, polyN, polySigma, flags);
}
}
INSTANTIATE_TEST_CASE_P(Video, FarnebackOpticalFlowTest, ALL_DEVICES);
//////////////////////////////////////////////////////
// FGDStatModel
namespace cv
{
template<> void Ptr<CvBGStatModel>::delete_obj()
{
cvReleaseBGStatModel(&obj);
}
}
GPU_PERF_TEST(FGDStatModel, cv::gpu::DeviceInfo, std::string)
{
std::string inputFile = perf::TestBase::getDataPath(std::string("gpu/video/") + GET_PARAM(1));
cv::VideoCapture cap(inputFile);
ASSERT_TRUE(cap.isOpened());
cv::Mat frame;
cap >> frame;
ASSERT_FALSE(frame.empty());
IplImage ipl_frame = frame;
cv::Ptr<CvBGStatModel> model(cvCreateFGDStatModel(&ipl_frame));
declare.time(60);
for (int i = 0; i < 10; ++i)
{
cap >> frame;
ASSERT_FALSE(frame.empty());
ipl_frame = frame;
startTimer();
next();
cvUpdateBGStatModel(&ipl_frame, model);
stopTimer();
}
}
INSTANTIATE_TEST_CASE_P(Video, FGDStatModel, testing::Combine(
ALL_DEVICES,
testing::Values(std::string("768x576.avi"), std::string("1920x1080.avi"))));
//////////////////////////////////////////////////////
// MOG
IMPLEMENT_PARAM_CLASS(LearningRate, double)
GPU_PERF_TEST(MOG, cv::gpu::DeviceInfo, std::string, Channels, LearningRate)
{
std::string inputFile = perf::TestBase::getDataPath(std::string("gpu/video/") + GET_PARAM(1));
int cn = GET_PARAM(2);
double learningRate = GET_PARAM(3);
cv::VideoCapture cap(inputFile);
ASSERT_TRUE(cap.isOpened());
cv::Mat frame;
cv::BackgroundSubtractorMOG mog;
cv::Mat foreground;
cap >> frame;
ASSERT_FALSE(frame.empty());
if (cn != 3)
{
cv::Mat temp;
if (cn == 1)
cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
else
cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
cv::swap(temp, frame);
}
mog(frame, foreground, learningRate);
for (int i = 0; i < 10; ++i)
{
cap >> frame;
ASSERT_FALSE(frame.empty());
if (cn != 3)
{
cv::Mat temp;
if (cn == 1)
cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
else
cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
cv::swap(temp, frame);
}
startTimer(); next();
mog(frame, foreground, learningRate);
stopTimer();
}
}
INSTANTIATE_TEST_CASE_P(Video, MOG, testing::Combine(
ALL_DEVICES,
testing::Values(std::string("768x576.avi"), std::string("1920x1080.avi")),
testing::Values(Channels(1), Channels(3)/*, Channels(4)*/),
testing::Values(LearningRate(0.0), LearningRate(0.01))));
//////////////////////////////////////////////////////
// MOG2
GPU_PERF_TEST(MOG2_update, cv::gpu::DeviceInfo, std::string, Channels)
{
std::string inputFile = perf::TestBase::getDataPath(std::string("gpu/video/") + GET_PARAM(1));
int cn = GET_PARAM(2);
cv::VideoCapture cap(inputFile);
ASSERT_TRUE(cap.isOpened());
cv::Mat frame;
cv::BackgroundSubtractorMOG2 mog2;
cv::Mat foreground;
cap >> frame;
ASSERT_FALSE(frame.empty());
if (cn != 3)
{
cv::Mat temp;
if (cn == 1)
cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
else
cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
cv::swap(temp, frame);
}
mog2(frame, foreground);
for (int i = 0; i < 10; ++i)
{
cap >> frame;
ASSERT_FALSE(frame.empty());
if (cn != 3)
{
cv::Mat temp;
if (cn == 1)
cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
else
cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
cv::swap(temp, frame);
}
startTimer(); next();
mog2(frame, foreground);
stopTimer();
}
}
INSTANTIATE_TEST_CASE_P(Video, MOG2_update, testing::Combine(
ALL_DEVICES,
testing::Values(std::string("768x576.avi"), std::string("1920x1080.avi")),
testing::Values(Channels(1), Channels(3)/*, Channels(4)*/)));
GPU_PERF_TEST(MOG2_getBackgroundImage, cv::gpu::DeviceInfo, std::string, Channels)
{
std::string inputFile = perf::TestBase::getDataPath(std::string("gpu/video/") + GET_PARAM(1));
int cn = GET_PARAM(2);
cv::VideoCapture cap(inputFile);
ASSERT_TRUE(cap.isOpened());
cv::Mat frame;
cv::BackgroundSubtractorMOG2 mog2;
cv::Mat foreground;
for (int i = 0; i < 10; ++i)
{
cap >> frame;
ASSERT_FALSE(frame.empty());
if (cn != 3)
{
cv::Mat temp;
if (cn == 1)
cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
else
cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
cv::swap(temp, frame);
}
mog2(frame, foreground);
}
cv::Mat background;
mog2.getBackgroundImage(background);
TEST_CYCLE()
{
mog2.getBackgroundImage(background);
}
}
INSTANTIATE_TEST_CASE_P(Video, MOG2_getBackgroundImage, testing::Combine(
ALL_DEVICES,
testing::Values(std::string("768x576.avi"), std::string("1920x1080.avi")),
testing::Values(/*Channels(1),*/ Channels(3)/*, Channels(4)*/)));
//////////////////////////////////////////////////////
// GMG
IMPLEMENT_PARAM_CLASS(MaxFeatures, int)
GPU_PERF_TEST(GMG, cv::gpu::DeviceInfo, std::string, Channels, MaxFeatures)
{
std::string inputFile = perf::TestBase::getDataPath(std::string("gpu/video/") + GET_PARAM(1));
int cn = GET_PARAM(2);
int maxFeatures = GET_PARAM(3);
cv::VideoCapture cap(inputFile);
ASSERT_TRUE(cap.isOpened());
cv::Mat frame;
cap >> frame;
ASSERT_FALSE(frame.empty());
if (cn != 3)
{
cv::Mat temp;
if (cn == 1)
cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
else
cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
cv::swap(temp, frame);
}
cv::Mat fgmask;
cv::Mat zeros(frame.size(), CV_8UC1, cv::Scalar::all(0));
cv::BackgroundSubtractorGMG gmg;
gmg.set("maxFeatures", maxFeatures);
gmg.initialize(frame.size(), 0.0, 255.0);
gmg(frame, fgmask);
for (int i = 0; i < 150; ++i)
{
cap >> frame;
if (frame.empty())
{
cap.open(inputFile);
cap >> frame;
}
if (cn != 3)
{
cv::Mat temp;
if (cn == 1)
cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
else
cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
cv::swap(temp, frame);
}
startTimer(); next();
gmg(frame, fgmask);
stopTimer();
}
}
INSTANTIATE_TEST_CASE_P(Video, GMG, testing::Combine(
ALL_DEVICES,
testing::Values(std::string("768x576.avi"), std::string("1920x1080.avi")),
testing::Values(Channels(1), Channels(3), Channels(4)),
testing::Values(MaxFeatures(20), MaxFeatures(40), MaxFeatures(60))));
//////////////////////////////////////////////////////
// VideoWriter
#ifdef WIN32
GPU_PERF_TEST(VideoWriter, cv::gpu::DeviceInfo, std::string)
{
const double FPS = 25.0;
std::string inputFile = perf::TestBase::getDataPath(std::string("gpu/video/") + GET_PARAM(1));
std::string outputFile = cv::tempfile(".avi");
cv::VideoCapture reader(inputFile);
ASSERT_TRUE( reader.isOpened() );
cv::VideoWriter writer;
cv::Mat frame;
declare.time(30);
for (int i = 0; i < 10; ++i)
{
reader >> frame;
ASSERT_FALSE(frame.empty());
if (!writer.isOpened())
writer.open(outputFile, CV_FOURCC('X', 'V', 'I', 'D'), FPS, frame.size());
startTimer(); next();
writer.write(frame);
stopTimer();
}
}
INSTANTIATE_TEST_CASE_P(Video, VideoWriter, testing::Combine(
ALL_DEVICES,
testing::Values(std::string("768x576.avi"), std::string("1920x1080.avi"))));
#endif // WIN32
//////////////////////////////////////////////////////
// VideoReader
GPU_PERF_TEST(VideoReader, cv::gpu::DeviceInfo, std::string)
{
std::string inputFile = perf::TestBase::getDataPath(std::string("gpu/video/") + GET_PARAM(1));
cv::VideoCapture reader(inputFile);
ASSERT_TRUE( reader.isOpened() );
cv::Mat frame;
reader >> frame;
declare.time(20);
TEST_CYCLE_N(10)
{
reader >> frame;
}
}
INSTANTIATE_TEST_CASE_P(Video, VideoReader, testing::Combine(
ALL_DEVICES,
testing::Values(std::string("768x576.avi"), std::string("1920x1080.avi"))));
#endif

View File

@ -420,16 +420,16 @@ void cv::gpu::BFMatcher_GPU::matchConvert(const Mat& trainIdx, const Mat& imgIdx
const float* distance_ptr = distance.ptr<float>();
for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx, ++trainIdx_ptr, ++imgIdx_ptr, ++distance_ptr)
{
int trainIdx = *trainIdx_ptr;
int _trainIdx = *trainIdx_ptr;
if (trainIdx == -1)
if (_trainIdx == -1)
continue;
int imgIdx = *imgIdx_ptr;
int _imgIdx = *imgIdx_ptr;
float distance = *distance_ptr;
float _distance = *distance_ptr;
DMatch m(queryIdx, trainIdx, imgIdx, distance);
DMatch m(queryIdx, _trainIdx, _imgIdx, _distance);
matches.push_back(m);
}
@ -558,13 +558,13 @@ void cv::gpu::BFMatcher_GPU::knnMatchConvert(const Mat& trainIdx, const Mat& dis
for (int i = 0; i < k; ++i, ++trainIdx_ptr, ++distance_ptr)
{
int trainIdx = *trainIdx_ptr;
int _trainIdx = *trainIdx_ptr;
if (trainIdx != -1)
if (_trainIdx != -1)
{
float distance = *distance_ptr;
float _distance = *distance_ptr;
DMatch m(queryIdx, trainIdx, 0, distance);
DMatch m(queryIdx, _trainIdx, 0, _distance);
curMatches.push_back(m);
}
@ -680,15 +680,15 @@ void cv::gpu::BFMatcher_GPU::knnMatch2Convert(const Mat& trainIdx, const Mat& im
for (int i = 0; i < 2; ++i, ++trainIdx_ptr, ++imgIdx_ptr, ++distance_ptr)
{
int trainIdx = *trainIdx_ptr;
int _trainIdx = *trainIdx_ptr;
if (trainIdx != -1)
if (_trainIdx != -1)
{
int imgIdx = *imgIdx_ptr;
int _imgIdx = *imgIdx_ptr;
float distance = *distance_ptr;
float _distance = *distance_ptr;
DMatch m(queryIdx, trainIdx, imgIdx, distance);
DMatch m(queryIdx, _trainIdx, _imgIdx, _distance);
curMatches.push_back(m);
}
@ -868,25 +868,25 @@ void cv::gpu::BFMatcher_GPU::radiusMatchConvert(const Mat& trainIdx, const Mat&
const int* trainIdx_ptr = trainIdx.ptr<int>(queryIdx);
const float* distance_ptr = distance.ptr<float>(queryIdx);
const int nMatches = std::min(nMatches_ptr[queryIdx], trainIdx.cols);
const int nMatched = std::min(nMatches_ptr[queryIdx], trainIdx.cols);
if (nMatches == 0)
if (nMatched == 0)
{
if (!compactResult)
matches.push_back(vector<DMatch>());
continue;
}
matches.push_back(vector<DMatch>(nMatches));
matches.push_back(vector<DMatch>(nMatched));
vector<DMatch>& curMatches = matches.back();
for (int i = 0; i < nMatches; ++i, ++trainIdx_ptr, ++distance_ptr)
for (int i = 0; i < nMatched; ++i, ++trainIdx_ptr, ++distance_ptr)
{
int trainIdx = *trainIdx_ptr;
int _trainIdx = *trainIdx_ptr;
float distance = *distance_ptr;
float _distance = *distance_ptr;
DMatch m(queryIdx, trainIdx, 0, distance);
DMatch m(queryIdx, _trainIdx, 0, _distance);
curMatches[i] = m;
}
@ -1009,9 +1009,9 @@ void cv::gpu::BFMatcher_GPU::radiusMatchConvert(const Mat& trainIdx, const Mat&
const int* imgIdx_ptr = imgIdx.ptr<int>(queryIdx);
const float* distance_ptr = distance.ptr<float>(queryIdx);
const int nMatches = std::min(nMatches_ptr[queryIdx], trainIdx.cols);
const int nMatched = std::min(nMatches_ptr[queryIdx], trainIdx.cols);
if (nMatches == 0)
if (nMatched == 0)
{
if (!compactResult)
matches.push_back(vector<DMatch>());
@ -1020,9 +1020,9 @@ void cv::gpu::BFMatcher_GPU::radiusMatchConvert(const Mat& trainIdx, const Mat&
matches.push_back(vector<DMatch>());
vector<DMatch>& curMatches = matches.back();
curMatches.reserve(nMatches);
curMatches.reserve(nMatched);
for (int i = 0; i < nMatches; ++i, ++trainIdx_ptr, ++imgIdx_ptr, ++distance_ptr)
for (int i = 0; i < nMatched; ++i, ++trainIdx_ptr, ++imgIdx_ptr, ++distance_ptr)
{
int _trainIdx = *trainIdx_ptr;
int _imgIdx = *imgIdx_ptr;

View File

@ -214,6 +214,7 @@ void cv::gpu::solvePnPRansac(const Mat& object, const Mat& image, const Mat& cam
int num_iters, float max_dist, int min_inlier_count,
vector<int>* inliers)
{
(void)min_inlier_count;
CV_Assert(object.rows == 1 && object.cols > 0 && object.type() == CV_32FC3);
CV_Assert(image.rows == 1 && image.cols > 0 && image.type() == CV_32FC2);
CV_Assert(object.cols == image.cols);

View File

@ -143,7 +143,7 @@ public:
}
unsigned int process(const GpuMat& image, GpuMat& objectsBuf, float scaleFactor, int minNeighbors,
bool findLargestObject, bool visualizeInPlace, cv::Size minSize, cv::Size maxObjectSize)
bool findLargestObject, bool visualizeInPlace, cv::Size minSize, cv::Size /*maxObjectSize*/)
{
CV_Assert( scaleFactor > 1 && image.depth() == CV_8U);
@ -380,12 +380,12 @@ public:
LbpCascade(){}
virtual ~LbpCascade(){}
virtual unsigned int process(const GpuMat& image, GpuMat& objects, float scaleFactor, int groupThreshold, bool findLargestObject,
bool visualizeInPlace, cv::Size minObjectSize, cv::Size maxObjectSize)
virtual unsigned int process(const GpuMat& image, GpuMat& objects, float scaleFactor, int groupThreshold, bool /*findLargestObject*/,
bool /*visualizeInPlace*/, cv::Size minObjectSize, cv::Size maxObjectSize)
{
CV_Assert(scaleFactor > 1 && image.depth() == CV_8U);
const int defaultObjSearchNum = 100;
// const int defaultObjSearchNum = 100;
const float grouping_eps = 0.2f;
if( !objects.empty() && objects.depth() == CV_32S)

View File

@ -56,7 +56,7 @@ namespace cv { namespace gpu { namespace device
__global__ void matchUnrolled(const DevMem2D_<T> query, int imgIdx, const DevMem2D_<T> train, float maxDistance, const Mask mask,
PtrStepi bestTrainIdx, PtrStepi bestImgIdx, PtrStepf bestDistance, unsigned int* nMatches, int maxCount)
{
#if __CUDA_ARCH__ >= 110
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 110)
extern __shared__ int smem[];
@ -168,7 +168,7 @@ namespace cv { namespace gpu { namespace device
__global__ void match(const DevMem2D_<T> query, int imgIdx, const DevMem2D_<T> train, float maxDistance, const Mask mask,
PtrStepi bestTrainIdx, PtrStepi bestImgIdx, PtrStepf bestDistance, unsigned int* nMatches, int maxCount)
{
#if __CUDA_ARCH__ >= 110
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 110)
extern __shared__ int smem[];

View File

@ -261,7 +261,7 @@ namespace cv { namespace gpu { namespace device
__global__ void edgesHysteresisLocal(PtrStepi map, ushort2* st, int rows, int cols)
{
#if __CUDA_ARCH__ >= 120
#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ >= 120)
__shared__ int smem[18][18];
@ -358,7 +358,7 @@ namespace cv { namespace gpu { namespace device
__global__ void edgesHysteresisGlobal(PtrStepi map, ushort2* st1, ushort2* st2, int rows, int cols, int count)
{
#if __CUDA_ARCH__ >= 120
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 120
const int stack_size = 512;

View File

@ -316,7 +316,7 @@ namespace cv { namespace gpu { namespace device
}
}
changed = Emulation::sycthOr(changed);
changed = Emulation::syncthreadsOr(changed);
if (!changed)
break;
@ -474,7 +474,7 @@ namespace cv { namespace gpu { namespace device
}
}
}
} while (Emulation::sycthOr(changed));
} while (Emulation::syncthreadsOr(changed));
}
__global__ void flatten(const DevMem2D edges, DevMem2Di comps)

View File

@ -64,7 +64,7 @@ namespace cv { namespace gpu { namespace device
template <int KSIZE, typename T, typename D, typename B>
__global__ void linearColumnFilter(const DevMem2D_<T> src, PtrStep<D> dst, const int anchor, const B brd)
{
#if __CUDA_ARCH__ >= 200
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200)
const int BLOCK_DIM_X = 16;
const int BLOCK_DIM_Y = 16;
const int PATCH_PER_BLOCK = 4;

View File

@ -223,7 +223,7 @@ namespace cv { namespace gpu { namespace device
template <bool calcScore, class Mask>
__global__ void calcKeypoints(const DevMem2Db img, const Mask mask, short2* kpLoc, const unsigned int maxKeypoints, PtrStepi score, const int threshold)
{
#if __CUDA_ARCH__ >= 110
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 110)
const int j = threadIdx.x + blockIdx.x * blockDim.x + 3;
const int i = threadIdx.y + blockIdx.y * blockDim.y + 3;
@ -325,7 +325,7 @@ namespace cv { namespace gpu { namespace device
__global__ void nonmaxSupression(const short2* kpLoc, int count, const DevMem2Di scoreMat, short2* locFinal, float* responseFinal)
{
#if __CUDA_ARCH__ >= 110
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 110)
const int kpIdx = threadIdx.x + blockIdx.x * blockDim.x;

View File

@ -63,7 +63,7 @@ namespace cv { namespace gpu { namespace device
#define MERGE_THREADBLOCK_SIZE 256
#define USE_SMEM_ATOMICS (__CUDA_ARCH__ >= 120)
#define USE_SMEM_ATOMICS (defined (__CUDA_ARCH__) && (__CUDA_ARCH__ >= 120))
namespace hist
{

View File

@ -59,7 +59,7 @@ namespace cv { namespace gpu { namespace device
{
__shared__ int s_queues[4][32 * PIXELS_PER_THREAD];
__shared__ int s_qsize[4];
__shared__ int s_start[4];
__shared__ int s_globStart[4];
const int x = blockIdx.x * blockDim.x * PIXELS_PER_THREAD + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
@ -73,9 +73,10 @@ namespace cv { namespace gpu { namespace device
__syncthreads();
// fill the queue
const uchar* srcRow = src.ptr(y);
for (int i = 0, xx = x; i < PIXELS_PER_THREAD && xx < src.cols; ++i, xx += blockDim.x)
{
if (src(y, xx))
if (srcRow[xx])
{
const unsigned int val = (y << 16) | xx;
const int qidx = Emulation::smem::atomicAdd(&s_qsize[threadIdx.y], 1);
@ -89,36 +90,34 @@ namespace cv { namespace gpu { namespace device
if (threadIdx.x == 0 && threadIdx.y == 0)
{
// find how many items are stored in each list
int total_size = 0;
int totalSize = 0;
for (int i = 0; i < blockDim.y; ++i)
{
s_start[i] = total_size;
total_size += s_qsize[i];
s_globStart[i] = totalSize;
totalSize += s_qsize[i];
}
// calculate the offset in the global list
const int global_offset = atomicAdd(&g_counter, total_size);
const int globalOffset = atomicAdd(&g_counter, totalSize);
for (int i = 0; i < blockDim.y; ++i)
s_start[i] += global_offset;
s_globStart[i] += globalOffset;
}
__syncthreads();
// copy local queues to global queue
const int qsize = s_qsize[threadIdx.y];
for(int i = threadIdx.x; i < qsize; i += blockDim.x)
{
const unsigned int val = s_queues[threadIdx.y][i];
list[s_start[threadIdx.y] + i] = val;
}
int gidx = s_globStart[threadIdx.y] + threadIdx.x;
for(int i = threadIdx.x; i < qsize; i += blockDim.x, gidx += blockDim.x)
list[gidx] = s_queues[threadIdx.y][i];
}
int buildPointList_gpu(DevMem2Db src, unsigned int* list)
{
void* counter_ptr;
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) );
void* counterPtr;
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(int)) );
cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
const dim3 block(32, 4);
const dim3 grid(divUp(src.cols, block.x * PIXELS_PER_THREAD), divUp(src.rows, block.y));
@ -130,10 +129,10 @@ namespace cv { namespace gpu { namespace device
cudaSafeCall( cudaDeviceSynchronize() );
int total_count;
cudaSafeCall( cudaMemcpy(&total_count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost) );
int totalCount;
cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
return total_count;
return totalCount;
}
////////////////////////////////////////////////////////////////////////
@ -144,24 +143,26 @@ namespace cv { namespace gpu { namespace device
const int n = blockIdx.x;
const float ang = n * theta;
float sin_ang;
float cos_ang;
sincosf(ang, &sin_ang, &cos_ang);
float sinVal;
float cosVal;
sincosf(ang, &sinVal, &cosVal);
sinVal *= irho;
cosVal *= irho;
const float tabSin = sin_ang * irho;
const float tabCos = cos_ang * irho;
const int shift = (numrho - 1) / 2;
int* accumRow = accum.ptr(n + 1);
for (int i = threadIdx.x; i < count; i += blockDim.x)
{
const unsigned int qvalue = list[i];
const unsigned int val = list[i];
const int x = (qvalue & 0x0000FFFF);
const int y = (qvalue >> 16) & 0x0000FFFF;
const int x = (val & 0xFFFF);
const int y = (val >> 16) & 0xFFFF;
int r = __float2int_rn(x * tabCos + y * tabSin);
r += (numrho - 1) / 2;
int r = __float2int_rn(x * cosVal + y * sinVal);
r += shift;
::atomicAdd(accum.ptr(n + 1) + r + 1, 1);
::atomicAdd(accumRow + r + 1, 1);
}
}
@ -177,30 +178,32 @@ namespace cv { namespace gpu { namespace device
const int n = blockIdx.x;
const float ang = n * theta;
float sin_ang;
float cos_ang;
sincosf(ang, &sin_ang, &cos_ang);
float sinVal;
float cosVal;
sincosf(ang, &sinVal, &cosVal);
sinVal *= irho;
cosVal *= irho;
const float tabSin = sin_ang * irho;
const float tabCos = cos_ang * irho;
const int shift = (numrho - 1) / 2;
for (int i = threadIdx.x; i < count; i += blockDim.x)
{
const unsigned int qvalue = list[i];
const unsigned int val = list[i];
const int x = (qvalue & 0x0000FFFF);
const int y = (qvalue >> 16) & 0x0000FFFF;
const int x = (val & 0xFFFF);
const int y = (val >> 16) & 0xFFFF;
int r = __float2int_rn(x * tabCos + y * tabSin);
r += (numrho - 1) / 2;
int r = __float2int_rn(x * cosVal + y * sinVal);
r += shift;
Emulation::smem::atomicAdd(&smem[r + 1], 1);
}
__syncthreads();
for (int i = threadIdx.x; i < numrho; i += blockDim.x)
accum(n + 1, i) = smem[i];
int* accumRow = accum.ptr(n + 1);
for (int i = threadIdx.x; i < numrho + 1; i += blockDim.x)
accumRow[i] = smem[i];
}
void linesAccum_gpu(const unsigned int* list, int count, DevMem2Di accum, float rho, float theta, size_t sharedMemPerBlock, bool has20)
@ -225,21 +228,21 @@ namespace cv { namespace gpu { namespace device
////////////////////////////////////////////////////////////////////////
// linesGetResult
__global__ void linesGetResult(const DevMem2Di accum, float2* out, int* votes, const int maxSize, const float threshold, const float theta, const float rho, const int numrho)
__global__ void linesGetResult(const DevMem2Di accum, float2* out, int* votes, const int maxSize, const float rho, const float theta, const float threshold, const int numrho)
{
__shared__ int smem[8][32];
int r = blockIdx.x * (blockDim.x - 2) + threadIdx.x;
int n = blockIdx.y * (blockDim.y - 2) + threadIdx.y;
const int x = blockIdx.x * (blockDim.x - 2) + threadIdx.x;
const int y = blockIdx.y * (blockDim.y - 2) + threadIdx.y;
if (r >= accum.cols || n >= accum.rows)
if (x >= accum.cols || y >= accum.rows)
return;
smem[threadIdx.y][threadIdx.x] = accum(n, r);
smem[threadIdx.y][threadIdx.x] = accum(y, x);
__syncthreads();
r -= 1;
n -= 1;
const int r = x - 1;
const int n = y - 1;
if (threadIdx.x == 0 || threadIdx.x == blockDim.x - 1 || threadIdx.y == 0 || threadIdx.y == blockDim.y - 1 || r >= accum.cols - 2 || n >= accum.rows - 2)
return;
@ -264,32 +267,32 @@ namespace cv { namespace gpu { namespace device
int linesGetResult_gpu(DevMem2Di accum, float2* out, int* votes, int maxSize, float rho, float theta, float threshold, bool doSort)
{
void* counter_ptr;
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) );
void* counterPtr;
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(int)) );
cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
const dim3 block(32, 8);
const dim3 grid(divUp(accum.cols, block.x - 2), divUp(accum.rows, block.y - 2));
linesGetResult<<<grid, block>>>(accum, out, votes, maxSize, threshold, theta, rho, accum.cols - 2);
linesGetResult<<<grid, block>>>(accum, out, votes, maxSize, rho, theta, threshold, accum.cols - 2);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
int total_count;
cudaSafeCall( cudaMemcpy(&total_count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost) );
int totalCount;
cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
total_count = ::min(total_count, maxSize);
totalCount = ::min(totalCount, maxSize);
if (doSort && total_count > 0)
if (doSort && totalCount > 0)
{
thrust::device_ptr<float2> out_ptr(out);
thrust::device_ptr<int> votes_ptr(votes);
thrust::sort_by_key(votes_ptr, votes_ptr + total_count, out_ptr, thrust::greater<int>());
thrust::device_ptr<float2> outPtr(out);
thrust::device_ptr<int> votesPtr(votes);
thrust::sort_by_key(votesPtr, votesPtr + totalCount, outPtr, thrust::greater<int>());
}
return total_count;
return totalCount;
}
}
}}}

View File

@ -0,0 +1,385 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or bpied warranties, including, but not limited to, the bpied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "opencv2/gpu/device/common.hpp"
namespace cv { namespace gpu { namespace device
{
namespace imgproc
{
// Utility function to extract unsigned chars from an unsigned integer
__device__ uchar4 int_to_uchar4(unsigned int in)
{
uchar4 bytes;
bytes.x = (in && 0x000000ff) >> 0;
bytes.y = (in && 0x0000ff00) >> 8;
bytes.z = (in && 0x00ff0000) >> 16;
bytes.w = (in && 0xff000000) >> 24;
return bytes;
}
__global__ void shfl_integral_horizontal(const PtrStep_<uint4> img, PtrStep_<uint4> integral)
{
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 300)
__shared__ int sums[128];
const int id = threadIdx.x;
const int lane_id = id % warpSize;
const int warp_id = id / warpSize;
const uint4 data = img(blockIdx.x, id);
const uchar4 a = int_to_uchar4(data.x);
const uchar4 b = int_to_uchar4(data.y);
const uchar4 c = int_to_uchar4(data.z);
const uchar4 d = int_to_uchar4(data.w);
int result[16];
result[0] = a.x;
result[1] = result[0] + a.y;
result[2] = result[1] + a.z;
result[3] = result[2] + a.w;
result[4] = result[3] + b.x;
result[5] = result[4] + b.y;
result[6] = result[5] + b.z;
result[7] = result[6] + b.w;
result[8] = result[7] + c.x;
result[9] = result[8] + c.y;
result[10] = result[9] + c.z;
result[11] = result[10] + c.w;
result[12] = result[11] + d.x;
result[13] = result[12] + d.y;
result[14] = result[13] + d.z;
result[15] = result[14] + d.w;
int sum = result[15];
// the prefix sum for each thread's 16 value is computed,
// now the final sums (result[15]) need to be shared
// with the other threads and add. To do this,
// the __shfl_up() instruction is used and a shuffle scan
// operation is performed to distribute the sums to the correct
// threads
#pragma unroll
for (int i = 1; i < 32; i *= 2)
{
const int n = __shfl_up(sum, i, 32);
if (lane_id >= i)
{
#pragma unroll
for (int i = 0; i < 16; ++i)
result[i] += n;
sum += n;
}
}
// Now the final sum for the warp must be shared
// between warps. This is done by each warp
// having a thread store to shared memory, then
// having some other warp load the values and
// compute a prefix sum, again by using __shfl_up.
// The results are uniformly added back to the warps.
// last thread in the warp holding sum of the warp
// places that in shared
if (threadIdx.x % warpSize == warpSize - 1)
sums[warp_id] = result[15];
__syncthreads();
if (warp_id == 0)
{
int warp_sum = sums[lane_id];
#pragma unroll
for (int i = 1; i <= 32; i *= 2)
{
const int n = __shfl_up(warp_sum, i, 32);
if (lane_id >= i)
warp_sum += n;
}
sums[lane_id] = warp_sum;
}
__syncthreads();
int blockSum = 0;
// fold in unused warp
if (warp_id > 0)
{
blockSum = sums[warp_id - 1];
#pragma unroll
for (int i = 0; i < 16; ++i)
result[i] += blockSum;
}
// assemble result
// Each thread has 16 values to write, which are
// now integer data (to avoid overflow). Instead of
// each thread writing consecutive uint4s, the
// approach shown here experiments using
// the shuffle command to reformat the data
// inside the registers so that each thread holds
// consecutive data to be written so larger contiguous
// segments can be assembled for writing.
/*
For example data that needs to be written as
GMEM[16] <- x0 x1 x2 x3 y0 y1 y2 y3 z0 z1 z2 z3 w0 w1 w2 w3
but is stored in registers (r0..r3), in four threads (0..3) as:
threadId 0 1 2 3
r0 x0 y0 z0 w0
r1 x1 y1 z1 w1
r2 x2 y2 z2 w2
r3 x3 y3 z3 w3
after apply __shfl_xor operations to move data between registers r1..r3:
threadId 00 01 10 11
x0 y0 z0 w0
xor(01)->y1 x1 w1 z1
xor(10)->z2 w2 x2 y2
xor(11)->w3 z3 y3 x3
and now x0..x3, and z0..z3 can be written out in order by all threads.
In the current code, each register above is actually representing
four integers to be written as uint4's to GMEM.
*/
result[4] = __shfl_xor(result[4] , 1, 32);
result[5] = __shfl_xor(result[5] , 1, 32);
result[6] = __shfl_xor(result[6] , 1, 32);
result[7] = __shfl_xor(result[7] , 1, 32);
result[8] = __shfl_xor(result[8] , 2, 32);
result[9] = __shfl_xor(result[9] , 2, 32);
result[10] = __shfl_xor(result[10], 2, 32);
result[11] = __shfl_xor(result[11], 2, 32);
result[12] = __shfl_xor(result[12], 3, 32);
result[13] = __shfl_xor(result[13], 3, 32);
result[14] = __shfl_xor(result[14], 3, 32);
result[15] = __shfl_xor(result[15], 3, 32);
uint4* integral_row = integral.ptr(blockIdx.x);
uint4 output;
///////
if (threadIdx.x % 4 == 0)
output = make_uint4(result[0], result[1], result[2], result[3]);
if (threadIdx.x % 4 == 1)
output = make_uint4(result[4], result[5], result[6], result[7]);
if (threadIdx.x % 4 == 2)
output = make_uint4(result[8], result[9], result[10], result[11]);
if (threadIdx.x % 4 == 3)
output = make_uint4(result[12], result[13], result[14], result[15]);
integral_row[threadIdx.x % 4 + (threadIdx.x / 4) * 16] = output;
///////
if (threadIdx.x % 4 == 2)
output = make_uint4(result[0], result[1], result[2], result[3]);
if (threadIdx.x % 4 == 3)
output = make_uint4(result[4], result[5], result[6], result[7]);
if (threadIdx.x % 4 == 0)
output = make_uint4(result[8], result[9], result[10], result[11]);
if (threadIdx.x % 4 == 1)
output = make_uint4(result[12], result[13], result[14], result[15]);
integral_row[(threadIdx.x + 2) % 4 + (threadIdx.x / 4) * 16 + 8] = output;
// continuning from the above example,
// this use of __shfl_xor() places the y0..y3 and w0..w3 data
// in order.
#pragma unroll
for (int i = 0; i < 16; ++i)
result[i] = __shfl_xor(result[i], 1, 32);
if (threadIdx.x % 4 == 0)
output = make_uint4(result[0], result[1], result[2], result[3]);
if (threadIdx.x % 4 == 1)
output = make_uint4(result[4], result[5], result[6], result[7]);
if (threadIdx.x % 4 == 2)
output = make_uint4(result[8], result[9], result[10], result[11]);
if (threadIdx.x % 4 == 3)
output = make_uint4(result[12], result[13], result[14], result[15]);
integral_row[threadIdx.x % 4 + (threadIdx.x / 4) * 16 + 4] = output;
///////
if (threadIdx.x % 4 == 2)
output = make_uint4(result[0], result[1], result[2], result[3]);
if (threadIdx.x % 4 == 3)
output = make_uint4(result[4], result[5], result[6], result[7]);
if (threadIdx.x % 4 == 0)
output = make_uint4(result[8], result[9], result[10], result[11]);
if (threadIdx.x % 4 == 1)
output = make_uint4(result[12], result[13], result[14], result[15]);
integral_row[(threadIdx.x + 2) % 4 + (threadIdx.x / 4) * 16 + 12] = output;
#endif
}
// This kernel computes columnwise prefix sums. When the data input is
// the row sums from above, this completes the integral image.
// The approach here is to have each block compute a local set of sums.
// First , the data covered by the block is loaded into shared memory,
// then instead of performing a sum in shared memory using __syncthreads
// between stages, the data is reformatted so that the necessary sums
// occur inside warps and the shuffle scan operation is used.
// The final set of sums from the block is then propgated, with the block
// computing "down" the image and adding the running sum to the local
// block sums.
__global__ void shfl_integral_vertical(DevMem2D_<unsigned int> integral)
{
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 300)
__shared__ unsigned int sums[32][9];
const int tidx = blockIdx.x * blockDim.x + threadIdx.x;
const int lane_id = tidx % 8;
if (tidx >= integral.cols)
return;
sums[threadIdx.x][threadIdx.y] = 0;
__syncthreads();
unsigned int stepSum = 0;
for (int y = threadIdx.y; y < integral.rows; y += blockDim.y)
{
unsigned int* p = integral.ptr(y) + tidx;
unsigned int sum = *p;
sums[threadIdx.x][threadIdx.y] = sum;
__syncthreads();
// place into SMEM
// shfl scan reduce the SMEM, reformating so the column
// sums are computed in a warp
// then read out properly
const int j = threadIdx.x % 8;
const int k = threadIdx.x / 8 + threadIdx.y * 4;
int partial_sum = sums[k][j];
for (int i = 1; i <= 8; i *= 2)
{
int n = __shfl_up(partial_sum, i, 32);
if (lane_id >= i)
partial_sum += n;
}
sums[k][j] = partial_sum;
__syncthreads();
if (threadIdx.y > 0)
sum += sums[threadIdx.x][threadIdx.y - 1];
sum += stepSum;
stepSum += sums[threadIdx.x][blockDim.y - 1];
__syncthreads();
*p = sum;
}
#endif
}
void shfl_integral_gpu(DevMem2Db img, DevMem2D_<unsigned int> integral, cudaStream_t stream)
{
{
// each thread handles 16 values, use 1 block/row
const int block = img.cols / 16;
// launch 1 block / row
const int grid = img.rows;
cudaSafeCall( cudaFuncSetCacheConfig(shfl_integral_horizontal, cudaFuncCachePreferL1) );
shfl_integral_horizontal<<<grid, block, 0, stream>>>((DevMem2D_<uint4>) img, (DevMem2D_<uint4>) integral);
cudaSafeCall( cudaGetLastError() );
}
{
const dim3 block(32, 8);
const dim3 grid(divUp(integral.cols, block.x), 1);
shfl_integral_vertical<<<grid, block, 0, stream>>>(integral);
cudaSafeCall( cudaGetLastError() );
}
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
}
}}}

View File

@ -279,7 +279,7 @@ namespace cv { namespace gpu { namespace device
rect.z = __float2int_rn(windowW * scale);
rect.w = __float2int_rn(windowH * scale);
int res = Emulation::smem::atomicInc(classified, (unsigned int)objects.cols);
int res = atomicInc(classified, (unsigned int)objects.cols);
objects(0, res) = rect;
}
}

View File

@ -215,7 +215,7 @@ namespace cv { namespace gpu { namespace device
maxval[blockIdx.y * gridDim.x + blockIdx.x] = (T)smaxval[0];
}
#if __CUDA_ARCH__ >= 110
#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ >= 110)
__shared__ bool is_last;
if (tid == 0)
@ -535,7 +535,7 @@ namespace cv { namespace gpu { namespace device
findMinMaxLocInSmem<nthreads, best_type>(sminval, smaxval, sminloc, smaxloc, tid);
#if __CUDA_ARCH__ >= 110
#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ >= 110)
__shared__ bool is_last;
if (tid == 0)
@ -841,7 +841,7 @@ namespace cv { namespace gpu { namespace device
sumInSmem<nthreads, uint>(scount, tid);
#if __CUDA_ARCH__ >= 110
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 110)
__shared__ bool is_last;
if (tid == 0)
@ -1034,7 +1034,7 @@ namespace cv { namespace gpu { namespace device
sumInSmem<nthreads, R>(smem, tid);
#if __CUDA_ARCH__ >= 110
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 110)
__shared__ bool is_last;
if (tid == 0)
@ -1115,7 +1115,7 @@ namespace cv { namespace gpu { namespace device
sumInSmem<nthreads, R>(smem, tid);
sumInSmem<nthreads, R>(smem + nthreads, tid);
#if __CUDA_ARCH__ >= 110
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 110)
__shared__ bool is_last;
if (tid == 0)
@ -1222,7 +1222,7 @@ namespace cv { namespace gpu { namespace device
sumInSmem<nthreads, R>(smem + nthreads, tid);
sumInSmem<nthreads, R>(smem + 2 * nthreads, tid);
#if __CUDA_ARCH__ >= 110
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 110
__shared__ bool is_last;
if (tid == 0)
@ -1339,7 +1339,7 @@ namespace cv { namespace gpu { namespace device
sumInSmem<nthreads, R>(smem + 2 * nthreads, tid);
sumInSmem<nthreads, R>(smem + 3 * nthreads, tid);
#if __CUDA_ARCH__ >= 110
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 110)
__shared__ bool is_last;
if (tid == 0)
@ -1975,7 +1975,7 @@ namespace cv { namespace gpu { namespace device
for (int c = 0; c < cn; ++c)
myVal[c] = op.startValue();
#if __CUDA_ARCH__ >= 200
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 200
// For cc >= 2.0 prefer L1 cache
for (int x = threadIdx.x; x < src.cols; x += 256)

View File

@ -82,7 +82,7 @@ namespace cv { namespace gpu { namespace device
smem3[tid] = val3;
__syncthreads();
#if __CUDA_ARCH__ > 110
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ > 110)
if (tid < 128)
{
smem1[tid] = val1 += smem1[tid + 128];
@ -138,7 +138,7 @@ namespace cv { namespace gpu { namespace device
smem2[tid] = val2;
__syncthreads();
#if __CUDA_ARCH__ > 110
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ > 110)
if (tid < 128)
{
smem1[tid] = val1 += smem1[tid + 128];
@ -184,7 +184,7 @@ namespace cv { namespace gpu { namespace device
smem1[tid] = val1;
__syncthreads();
#if __CUDA_ARCH__ > 110
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ > 110)
if (tid < 128)
{
smem1[tid] = val1 += smem1[tid + 128];
@ -271,7 +271,7 @@ namespace cv { namespace gpu { namespace device
template <int cn, int PATCH_X, int PATCH_Y, bool calcErr>
__global__ void lkSparse(const float2* prevPts, float2* nextPts, uchar* status, float* err, const int level, const int rows, const int cols)
{
#if __CUDA_ARCH__ <= 110
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ <= 110)
__shared__ float smem1[128];
__shared__ float smem2[128];
__shared__ float smem3[128];

View File

@ -64,7 +64,7 @@ namespace cv { namespace gpu { namespace device
template <int KSIZE, typename T, typename D, typename B>
__global__ void linearRowFilter(const DevMem2D_<T> src, PtrStep<D> dst, const int anchor, const B brd)
{
#if __CUDA_ARCH__ >= 200
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200)
const int BLOCK_DIM_X = 32;
const int BLOCK_DIM_Y = 8;
const int PATCH_PER_BLOCK = 4;

View File

@ -0,0 +1,92 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_GPU_TEXTURE_BINDER_HPP_
#define OPENCV_GPU_TEXTURE_BINDER_HPP_
#include "opencv2/gpu/devmem2d.hpp"
#include <safe_call.hpp>
namespace cv
{
namespace gpu
{
class TextureBinder
{
public:
template<class T, enum cudaTextureReadMode readMode>
TextureBinder(const PtrStepSz<T>& arr, const struct texture<T, 2, readMode>& tex) : texref(&tex)
{
cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
cudaSafeCall( cudaBindTexture2D(0, tex, arr.data, desc, arr.cols, arr.rows, arr.step) );
}
template<class T, enum cudaTextureReadMode readMode>
TextureBinder(const PtrSz<T>& arr, const struct texture<T, 1, readMode> &tex) : texref(&tex)
{
cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
cudaSafeCall( cudaBindTexture(0, tex, arr.data, desc, arr.size * arr.elemSize()) );
}
template<class A, class T, enum cudaTextureReadMode readMode>
TextureBinder(const A& arr, const struct texture<T, 2, readMode>& tex, const cudaChannelFormatDesc& desc) : texref(&tex)
{
cudaSafeCall( cudaBindTexture2D(0, tex, arr.data, desc, arr.cols, arr.rows, arr.step) );
}
~TextureBinder()
{
cudaSafeCall( cudaUnbindTexture(texref) );
}
private:
const struct textureReference *texref;
};
}
namespace device
{
using pcl::gpu::TextureBinder;
}
}
#endif /* OPENCV_GPU_TEXTURE_BINDER_HPP_*/

View File

@ -48,7 +48,7 @@ void cv::gpu::graphcut(GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, Gpu
void cv::gpu::graphcut(GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
void cv::gpu::connectivityMask(const GpuMat&, GpuMat&, const cv::Scalar&, const cv::Scalar&, Stream&) { throw_nogpu(); }
void cv::gpu::labelComponents(const GpuMat& mask, GpuMat& components, int, Stream& stream) { throw_nogpu(); }
void cv::gpu::labelComponents(const GpuMat&, GpuMat&, int, Stream&) { throw_nogpu(); }
#else /* !defined (HAVE_CUDA) */

View File

@ -315,7 +315,7 @@ void cv::gpu::HOGDescriptor::computeConfidenceMultiScale(const GpuMat& img, vect
double scale = 1.;
int levels = 0;
for (levels = 0; levels < conf_out.size(); levels++)
for (levels = 0; levels < (int)conf_out.size(); levels++)
{
scale = conf_out[levels].scale;
level_scale.push_back(scale);
@ -332,8 +332,8 @@ void cv::gpu::HOGDescriptor::computeConfidenceMultiScale(const GpuMat& img, vect
for (size_t i = 0; i < level_scale.size(); i++)
{
double scale = level_scale[i];
Size sz(cvRound(img.cols / scale), cvRound(img.rows / scale));
double _scale = level_scale[i];
Size sz(cvRound(img.cols / _scale), cvRound(img.rows / _scale));
GpuMat smaller_img;
if (sz == img.size())

View File

@ -57,11 +57,27 @@ namespace cv { namespace gpu { namespace device
namespace hough
{
int buildPointList_gpu(DevMem2Db src, unsigned int* list);
void linesAccum_gpu(const unsigned int* list, int count, DevMem2Di accum, float rho, float theta, size_t sharedMemPerBlock, bool has20);
int linesGetResult_gpu(DevMem2Di accum, float2* out, int* votes, int maxSize, float rho, float theta, float threshold, bool doSort);
}
}}}
//////////////////////////////////////////////////////////
// HoughLines
void cv::gpu::HoughLines(const GpuMat& src, GpuMat& lines, float rho, float theta, int threshold, bool doSort, int maxLines)
{
GpuMat accum, buf;
HoughLines(src, lines, accum, buf, rho, theta, threshold, doSort, maxLines);
}
void cv::gpu::HoughLines(const GpuMat& src, GpuMat& lines, GpuMat& accum, GpuMat& buf, float rho, float theta, int threshold, bool doSort, int maxLines)
{
HoughLinesTransform(src, accum, buf, rho, theta);
HoughLinesGet(accum, lines, rho, theta, threshold, doSort, maxLines);
}
void cv::gpu::HoughLinesTransform(const GpuMat& src, GpuMat& accum, GpuMat& buf, float rho, float theta)
{
using namespace cv::gpu::device::hough;
@ -80,23 +96,23 @@ void cv::gpu::HoughLinesTransform(const GpuMat& src, GpuMat& accum, GpuMat& buf,
CV_Assert(numangle > 0 && numrho > 0);
ensureSizeIsEnough(numangle + 2, numrho + 2, CV_32SC1, accum);
accum.setTo(cv::Scalar::all(0));
accum.setTo(Scalar::all(0));
cv::gpu::DeviceInfo devInfo;
DeviceInfo devInfo;
if (count > 0)
linesAccum_gpu(buf.ptr<unsigned int>(), count, accum, rho, theta, devInfo.sharedMemPerBlock(), devInfo.supports(cv::gpu::FEATURE_SET_COMPUTE_20));
linesAccum_gpu(buf.ptr<unsigned int>(), count, accum, rho, theta, devInfo.sharedMemPerBlock(), devInfo.supports(FEATURE_SET_COMPUTE_20));
}
void cv::gpu::HoughLinesGet(const GpuMat& accum, GpuMat& lines, float rho, float theta, int threshold, bool doSort, int maxLines)
{
using namespace cv::gpu::device;
using namespace cv::gpu::device::hough;
CV_Assert(accum.type() == CV_32SC1);
ensureSizeIsEnough(2, maxLines, CV_32FC2, lines);
int count = hough::linesGetResult_gpu(accum, lines.ptr<float2>(0), lines.ptr<int>(1), maxLines, rho, theta, threshold, doSort);
int count = linesGetResult_gpu(accum, lines.ptr<float2>(0), lines.ptr<int>(1), maxLines, rho, theta, threshold, doSort);
if (count > 0)
lines.cols = count;
@ -104,18 +120,6 @@ void cv::gpu::HoughLinesGet(const GpuMat& accum, GpuMat& lines, float rho, float
lines.release();
}
void cv::gpu::HoughLines(const GpuMat& src, GpuMat& lines, float rho, float theta, int threshold, bool doSort, int maxLines)
{
cv::gpu::GpuMat accum, buf;
HoughLines(src, lines, accum, buf, rho, theta, threshold, doSort, maxLines);
}
void cv::gpu::HoughLines(const GpuMat& src, GpuMat& lines, GpuMat& accum, GpuMat& buf, float rho, float theta, int threshold, bool doSort, int maxLines)
{
HoughLinesTransform(src, accum, buf, rho, theta);
HoughLinesGet(accum, lines, rho, theta, threshold, doSort, maxLines);
}
void cv::gpu::HoughLinesDownload(const GpuMat& d_lines, OutputArray h_lines_, OutputArray h_votes_)
{
if (d_lines.empty())
@ -129,14 +133,14 @@ void cv::gpu::HoughLinesDownload(const GpuMat& d_lines, OutputArray h_lines_, Ou
CV_Assert(d_lines.rows == 2 && d_lines.type() == CV_32FC2);
h_lines_.create(1, d_lines.cols, CV_32FC2);
cv::Mat h_lines = h_lines_.getMat();
Mat h_lines = h_lines_.getMat();
d_lines.row(0).download(h_lines);
if (h_votes_.needed())
{
h_votes_.create(1, d_lines.cols, CV_32SC1);
cv::Mat h_votes = h_votes_.getMat();
cv::gpu::GpuMat d_votes(1, d_lines.cols, CV_32SC1, const_cast<int*>(d_lines.ptr<int>(1)));
Mat h_votes = h_votes_.getMat();
GpuMat d_votes(1, d_lines.cols, CV_32SC1, const_cast<int*>(d_lines.ptr<int>(1)));
d_votes.download(h_votes);
}
}

View File

@ -261,6 +261,12 @@ namespace
}
}
#if defined __GNUC__ && __GNUC__ > 2 && __GNUC_MINOR__ > 4
typedef Npp32s __attribute__((__may_alias__)) Npp32s_a;
#else
typedef Npp32s Npp32s_a;
#endif
void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom, int left, int right, int borderType, const Scalar& value, Stream& s)
{
CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);
@ -308,7 +314,7 @@ void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom
case CV_32FC1:
{
Npp32f val = saturate_cast<Npp32f>(value[0]);
Npp32s nVal = *(reinterpret_cast<Npp32s*>(&val));
Npp32s nVal = *(reinterpret_cast<Npp32s_a*>(&val));
nppSafeCall( nppiCopyConstBorder_32s_C1R(src.ptr<Npp32s>(), static_cast<int>(src.step), srcsz,
dst.ptr<Npp32s>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );
break;
@ -527,11 +533,65 @@ void cv::gpu::integral(const GpuMat& src, GpuMat& sum, Stream& s)
integralBuffered(src, sum, buffer, s);
}
namespace cv { namespace gpu { namespace device
{
namespace imgproc
{
void shfl_integral_gpu(DevMem2Db img, DevMem2D_<unsigned int> integral, cudaStream_t stream);
}
}}}
void cv::gpu::integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer, Stream& s)
{
CV_Assert(src.type() == CV_8UC1);
if (sum.cols != src.cols + 1 && sum.rows != src.rows + 1)
sum.create(src.rows + 1, src.cols + 1, CV_32S);
cudaStream_t stream = StreamAccessor::getStream(s);
DeviceInfo info;
if (info.supports(WARP_SHUFFLE_FUNCTIONS))
{
GpuMat src16;
if (src.cols % 16 == 0)
src16 = src;
else
{
ensureSizeIsEnough(src.rows, ((src.cols + 15) / 16) * 16, src.type(), buffer);
GpuMat inner = buffer(Rect(0, 0, src.cols, src.rows));
if (s)
{
s.enqueueMemSet(buffer, Scalar::all(0));
s.enqueueCopy(src, inner);
}
else
{
buffer.setTo(Scalar::all(0));
src.copyTo(inner);
}
src16 = buffer;
}
sum.create(src16.rows + 1, src16.cols + 1, CV_32SC1);
if (s)
s.enqueueMemSet(sum, Scalar::all(0));
else
sum.setTo(Scalar::all(0));
GpuMat inner = sum(Rect(1, 1, src16.cols, src16.rows));
cv::gpu::device::imgproc::shfl_integral_gpu(src16, inner, stream);
if (src16.cols != src.cols)
sum = sum(Rect(0, 0, src.cols + 1, src.rows + 1));
}
else
{
sum.create(src.rows + 1, src.cols + 1, CV_32SC1);
NcvSize32u roiSize;
roiSize.width = src.cols;
@ -544,7 +604,6 @@ void cv::gpu::integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer, S
ncvSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) );
ensureSizeIsEnough(1, bufSize, CV_8UC1, buffer);
cudaStream_t stream = StreamAccessor::getStream(s);
NppStStreamHandler h(stream);
@ -553,6 +612,7 @@ void cv::gpu::integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer, S
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
}
//////////////////////////////////////////////////////////////////////////////

View File

@ -227,6 +227,7 @@ namespace
void matchTemplate_SQDIFF_32F(
const GpuMat& image, const GpuMat& templ, GpuMat& result, MatchTemplateBuf &buf, Stream& stream)
{
(void)buf;
result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
matchTemplateNaive_SQDIFF_32F(image, templ, result, image.channels(), StreamAccessor::getStream(stream));
}

View File

@ -67,7 +67,11 @@
// Guaranteed size cross-platform classifier structures
//
//==============================================================================
#if defined __GNUC__ && __GNUC__ > 2 && __GNUC_MINOR__ > 4
typedef Ncv32f __attribute__((__may_alias__)) Ncv32f_a;
#else
typedef Ncv32f Ncv32f_a;
#endif
struct HaarFeature64
{
@ -87,7 +91,7 @@ struct HaarFeature64
__host__ NCVStatus setWeight(Ncv32f weight)
{
((Ncv32f*)&(this->_ui2.y))[0] = weight;
((Ncv32f_a*)&(this->_ui2.y))[0] = weight;
return NCV_SUCCESS;
}
@ -102,7 +106,7 @@ struct HaarFeature64
__device__ __host__ Ncv32f getWeight(void)
{
return *(Ncv32f*)(&this->_ui2.y);
return *(Ncv32f_a*)(&this->_ui2.y);
}
};
@ -168,14 +172,13 @@ public:
}
};
struct HaarClassifierNodeDescriptor32
{
uint1 _ui1;
__host__ NCVStatus create(Ncv32f leafValue)
{
*(Ncv32f *)&this->_ui1 = leafValue;
*(Ncv32f_a *)&this->_ui1 = leafValue;
return NCV_SUCCESS;
}
@ -187,7 +190,7 @@ struct HaarClassifierNodeDescriptor32
__host__ Ncv32f getLeafValueHost(void)
{
return *(Ncv32f *)&this->_ui1.x;
return *(Ncv32f_a *)&this->_ui1.x;
}
#ifdef __CUDACC__
@ -203,6 +206,11 @@ struct HaarClassifierNodeDescriptor32
}
};
#if defined __GNUC__ && __GNUC__ > 2 && __GNUC_MINOR__ > 4
typedef Ncv32u __attribute__((__may_alias__)) Ncv32u_a;
#else
typedef Ncv32u Ncv32u_a;
#endif
struct HaarClassifierNode128
{
@ -216,19 +224,19 @@ struct HaarClassifierNode128
__host__ NCVStatus setThreshold(Ncv32f t)
{
this->_ui4.y = *(Ncv32u *)&t;
this->_ui4.y = *(Ncv32u_a *)&t;
return NCV_SUCCESS;
}
__host__ NCVStatus setLeftNodeDesc(HaarClassifierNodeDescriptor32 nl)
{
this->_ui4.z = *(Ncv32u *)&nl;
this->_ui4.z = *(Ncv32u_a *)&nl;
return NCV_SUCCESS;
}
__host__ NCVStatus setRightNodeDesc(HaarClassifierNodeDescriptor32 nr)
{
this->_ui4.w = *(Ncv32u *)&nr;
this->_ui4.w = *(Ncv32u_a *)&nr;
return NCV_SUCCESS;
}
@ -239,7 +247,7 @@ struct HaarClassifierNode128
__host__ __device__ Ncv32f getThreshold(void)
{
return *(Ncv32f*)&this->_ui4.y;
return *(Ncv32f_a*)&this->_ui4.y;
}
__host__ __device__ HaarClassifierNodeDescriptor32 getLeftNodeDesc(void)
@ -264,7 +272,7 @@ struct HaarStage64
__host__ NCVStatus setStageThreshold(Ncv32f t)
{
this->_ui2.x = *(Ncv32u *)&t;
this->_ui2.x = *(Ncv32u_a *)&t;
return NCV_SUCCESS;
}
@ -290,7 +298,7 @@ struct HaarStage64
__host__ __device__ Ncv32f getStageThreshold(void)
{
return *(Ncv32f*)&this->_ui2.x;
return *(Ncv32f_a*)&this->_ui2.x;
}
__host__ __device__ Ncv32u getStartClassifierRootNodeOffset(void)

View File

@ -1423,7 +1423,7 @@ NCVStatus compactVector_32u_device(Ncv32u *d_src, Ncv32u srcLen,
(d_hierSums.ptr() + partSumOffsets[i],
partSumNums[i], NULL,
d_hierSums.ptr() + partSumOffsets[i+1],
NULL);
0);
}
else
{
@ -1433,7 +1433,7 @@ NCVStatus compactVector_32u_device(Ncv32u *d_src, Ncv32u srcLen,
(d_hierSums.ptr() + partSumOffsets[i],
partSumNums[i], NULL,
NULL,
NULL);
0);
}
ncvAssertCUDALastErrorReturn(NPPST_CUDA_KERNEL_EXECUTION_ERROR);
@ -1557,16 +1557,21 @@ NCVStatus nppsStCompact_32s(Ncv32s *d_src, Ncv32u srcLen,
}
#if defined __GNUC__ && __GNUC__ > 2 && __GNUC_MINOR__ > 4
typedef Ncv32u __attribute__((__may_alias__)) Ncv32u_a;
#else
typedef Ncv32u Ncv32u_a;
#endif
NCVStatus nppsStCompact_32f(Ncv32f *d_src, Ncv32u srcLen,
Ncv32f *d_dst, Ncv32u *p_dstLen,
Ncv32f elemRemove, Ncv8u *pBuffer,
Ncv32u bufSize, cudaDeviceProp &devProp)
{
return nppsStCompact_32u((Ncv32u *)d_src, srcLen, (Ncv32u *)d_dst, p_dstLen,
*(Ncv32u *)&elemRemove, pBuffer, bufSize, devProp);
*(Ncv32u_a *)&elemRemove, pBuffer, bufSize, devProp);
}
NCVStatus nppsStCompact_32u_host(Ncv32u *h_src, Ncv32u srcLen,
Ncv32u *h_dst, Ncv32u *dstLen, Ncv32u elemRemove)
{
@ -1602,17 +1607,16 @@ NCVStatus nppsStCompact_32u_host(Ncv32u *h_src, Ncv32u srcLen,
NCVStatus nppsStCompact_32s_host(Ncv32s *h_src, Ncv32u srcLen,
Ncv32s *h_dst, Ncv32u *dstLen, Ncv32s elemRemove)
{
return nppsStCompact_32u_host((Ncv32u *)h_src, srcLen, (Ncv32u *)h_dst, dstLen, *(Ncv32u *)&elemRemove);
return nppsStCompact_32u_host((Ncv32u *)h_src, srcLen, (Ncv32u *)h_dst, dstLen, *(Ncv32u_a *)&elemRemove);
}
NCVStatus nppsStCompact_32f_host(Ncv32f *h_src, Ncv32u srcLen,
Ncv32f *h_dst, Ncv32u *dstLen, Ncv32f elemRemove)
{
return nppsStCompact_32u_host((Ncv32u *)h_src, srcLen, (Ncv32u *)h_dst, dstLen, *(Ncv32u *)&elemRemove);
return nppsStCompact_32u_host((Ncv32u *)h_src, srcLen, (Ncv32u *)h_dst, dstLen, *(Ncv32u_a *)&elemRemove);
}
//==============================================================================
//
// Filter.cu
@ -2066,7 +2070,7 @@ NCVStatus nppiStInterpolateFrames(const NppStInterpolationState *pState)
//==============================================================================
#if __CUDA_ARCH__ < 200
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 200)
// FP32 atomic add
static __forceinline__ __device__ float _atomicAdd(float *addr, float val)

View File

@ -51,11 +51,11 @@ namespace cv { namespace gpu { namespace device
struct Emulation
{
static __device__ __forceinline__ int sycthOr(int pred)
static __device__ __forceinline__ int syncthreadsOr(int pred)
{
#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 200)
// just campilation stab
return false;
return 0;
#else
return __syncthreads_or(pred);
#endif

View File

@ -119,7 +119,6 @@ namespace
int depth = src.depth();
int num_channels = src.channels();
Size size = src.size();
if (depth == CV_64F)
{

View File

@ -49,36 +49,36 @@ void cv::gpu::detail::VideoDecoder::create(const VideoReader_GPU::FormatInfo& vi
{
release();
cudaVideoCodec codec = static_cast<cudaVideoCodec>(videoFormat.codec);
cudaVideoChromaFormat chromaFormat = static_cast<cudaVideoChromaFormat>(videoFormat.chromaFormat);
cudaVideoCodec _codec = static_cast<cudaVideoCodec>(videoFormat.codec);
cudaVideoChromaFormat _chromaFormat = static_cast<cudaVideoChromaFormat>(videoFormat.chromaFormat);
cudaVideoCreateFlags videoCreateFlags = (codec == cudaVideoCodec_JPEG || codec == cudaVideoCodec_MPEG2) ?
cudaVideoCreateFlags videoCreateFlags = (_codec == cudaVideoCodec_JPEG || _codec == cudaVideoCodec_MPEG2) ?
cudaVideoCreate_PreferCUDA :
cudaVideoCreate_PreferCUVID;
// Validate video format. These are the currently supported formats via NVCUVID
CV_Assert(cudaVideoCodec_MPEG1 == codec ||
cudaVideoCodec_MPEG2 == codec ||
cudaVideoCodec_MPEG4 == codec ||
cudaVideoCodec_VC1 == codec ||
cudaVideoCodec_H264 == codec ||
cudaVideoCodec_JPEG == codec ||
cudaVideoCodec_YUV420== codec ||
cudaVideoCodec_YV12 == codec ||
cudaVideoCodec_NV12 == codec ||
cudaVideoCodec_YUYV == codec ||
cudaVideoCodec_UYVY == codec );
CV_Assert(cudaVideoCodec_MPEG1 == _codec ||
cudaVideoCodec_MPEG2 == _codec ||
cudaVideoCodec_MPEG4 == _codec ||
cudaVideoCodec_VC1 == _codec ||
cudaVideoCodec_H264 == _codec ||
cudaVideoCodec_JPEG == _codec ||
cudaVideoCodec_YUV420== _codec ||
cudaVideoCodec_YV12 == _codec ||
cudaVideoCodec_NV12 == _codec ||
cudaVideoCodec_YUYV == _codec ||
cudaVideoCodec_UYVY == _codec );
CV_Assert(cudaVideoChromaFormat_Monochrome == chromaFormat ||
cudaVideoChromaFormat_420 == chromaFormat ||
cudaVideoChromaFormat_422 == chromaFormat ||
cudaVideoChromaFormat_444 == chromaFormat);
CV_Assert(cudaVideoChromaFormat_Monochrome == _chromaFormat ||
cudaVideoChromaFormat_420 == _chromaFormat ||
cudaVideoChromaFormat_422 == _chromaFormat ||
cudaVideoChromaFormat_444 == _chromaFormat);
// Fill the decoder-create-info struct from the given video-format struct.
std::memset(&createInfo_, 0, sizeof(CUVIDDECODECREATEINFO));
// Create video decoder
createInfo_.CodecType = codec;
createInfo_.CodecType = _codec;
createInfo_.ulWidth = videoFormat.width;
createInfo_.ulHeight = videoFormat.height;
createInfo_.ulNumDecodeSurfaces = FrameQueue::MaximumSize;
@ -87,7 +87,7 @@ void cv::gpu::detail::VideoDecoder::create(const VideoReader_GPU::FormatInfo& vi
while (createInfo_.ulNumDecodeSurfaces * videoFormat.width * videoFormat.height > 16 * 1024 * 1024)
createInfo_.ulNumDecodeSurfaces--;
createInfo_.ChromaFormat = chromaFormat;
createInfo_.ChromaFormat = _chromaFormat;
createInfo_.OutputFormat = cudaVideoSurfaceFormat_NV12;
createInfo_.DeinterlaceMode = cudaVideoDeinterlaceMode_Adaptive;

View File

@ -39,7 +39,7 @@
//
//M*/
#include "precomp.hpp"
#include "test_precomp.hpp"
#ifdef HAVE_CUDA
@ -49,76 +49,103 @@ using namespace cv::gpu;
using namespace cvtest;
using namespace testing;
void print_info()
void printOsInfo()
{
printf("\n");
#if defined _WIN32
# if defined _WIN64
puts("OS: Windows 64");
cout << "OS: Windows x64 \n" << endl;
# else
puts("OS: Windows 32");
cout << "OS: Windows x32 \n" << endl;
# endif
#elif defined linux
# if defined _LP64
puts("OS: Linux 64");
cout << "OS: Linux x64 \n" << endl;
# else
puts("OS: Linux 32");
cout << "OS: Linux x32 \n" << endl;
# endif
#elif defined __APPLE__
# if defined _LP64
puts("OS: Apple 64");
cout << "OS: Apple x64 \n" << endl;
# else
puts("OS: Apple 32");
cout << "OS: Apple x32 \n" << endl;
# endif
#endif
}
int deviceCount = getCudaEnabledDeviceCount();
void printCudaInfo()
{
#ifndef HAVE_CUDA
cout << "OpenCV was built without CUDA support \n" << endl;
#else
int driver;
cudaDriverGetVersion(&driver);
printf("CUDA Driver version: %d\n", driver);
printf("CUDA Runtime version: %d\n", CUDART_VERSION);
printf("CUDA device count: %d\n\n", deviceCount);
cout << "CUDA Driver version: " << driver << '\n';
cout << "CUDA Runtime version: " << CUDART_VERSION << '\n';
cout << endl;
cout << "GPU module was compiled for the following GPU archs:" << endl;
cout << " BIN: " << CUDA_ARCH_BIN << '\n';
cout << " PTX: " << CUDA_ARCH_PTX << '\n';
cout << endl;
int deviceCount = getCudaEnabledDeviceCount();
cout << "CUDA device count: " << deviceCount << '\n';
cout << endl;
for (int i = 0; i < deviceCount; ++i)
{
DeviceInfo info(i);
printf("Device %d:\n", i);
printf(" Name: %s\n", info.name().c_str());
printf(" Compute capability version: %d.%d\n", info.majorVersion(), info.minorVersion());
printf(" Total memory: %d Mb\n", static_cast<int>(static_cast<int>(info.totalMemory() / 1024.0) / 1024.0));
printf(" Free memory: %d Mb\n", static_cast<int>(static_cast<int>(info.freeMemory() / 1024.0) / 1024.0));
if (info.isCompatible())
puts(" This device is compatible with current GPU module build\n");
else
puts(" This device is NOT compatible with current GPU module build\n");
cout << "Device [" << i << "] \n";
cout << "\t Name: " << info.name() << '\n';
cout << "\t Compute capability: " << info.majorVersion() << '.' << info.minorVersion()<< '\n';
cout << "\t Multi Processor Count: " << info.multiProcessorCount() << '\n';
cout << "\t Total memory: " << static_cast<int>(static_cast<int>(info.totalMemory() / 1024.0) / 1024.0) << " Mb \n";
cout << "\t Free memory: " << static_cast<int>(static_cast<int>(info.freeMemory() / 1024.0) / 1024.0) << " Mb \n";
if (!info.isCompatible())
cout << "\t !!! This device is NOT compatible with current GPU module build \n";
cout << endl;
}
puts("GPU module was compiled for the following GPU archs:");
printf(" BIN: %s\n", CUDA_ARCH_BIN);
printf(" PTX: %s\n\n", CUDA_ARCH_PTX);
#endif
}
enum OutputLevel
{
OutputLevelNone,
OutputLevelCompact,
OutputLevelFull
};
extern OutputLevel nvidiaTestOutputLevel;
int main(int argc, char** argv)
{
TS::ptr()->init("gpu");
InitGoogleTest(&argc, argv);
try
{
CommandLineParser cmd(argc, (const char**)argv,
"{ print_info_only | print_info_only | false | Print information about system and exit }"
"{ device | device | -1 | Device on which tests will be executed (-1 means all devices) }"
"{ nvtest_output_level | nvtest_output_level | compact | NVidia test verbosity level }"
);
const char* keys ="{ nvtest_output_level | nvtest_output_level | compact | NVidia test verbosity level }";
printOsInfo();
printCudaInfo();
CommandLineParser parser(argc, (const char**)argv, keys);
if (cmd.get<bool>("print_info_only"))
return 0;
string outputLevel = parser.get<string>("nvtest_output_level", "none");
int device = cmd.get<int>("device");
if (device < 0)
{
DeviceManager::instance().loadAll();
cout << "Run tests on all supported devices \n" << endl;
}
else
{
DeviceManager::instance().load(device);
DeviceInfo info(device);
cout << "Run tests on device " << device << " [" << info.name() << "] \n" << endl;
}
string outputLevel = cmd.get<string>("nvtest_output_level");
if (outputLevel == "none")
nvidiaTestOutputLevel = OutputLevelNone;
@ -127,9 +154,23 @@ int main(int argc, char** argv)
else if (outputLevel == "full")
nvidiaTestOutputLevel = OutputLevelFull;
print_info();
TS::ptr()->init("gpu");
InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
catch (const exception& e)
{
cerr << e.what() << endl;
return -1;
}
catch (...)
{
cerr << "Unknown error" << endl;
return -1;
}
return 0;
}
#else // HAVE_CUDA

View File

@ -1,7 +1,7 @@
#ifndef __main_test_nvidia_h__
#define __main_test_nvidia_h__
#include<string>
#include <string>
enum OutputLevel
{
@ -10,6 +10,8 @@ enum OutputLevel
OutputLevelFull
};
extern OutputLevel nvidiaTestOutputLevel;
bool nvidia_NPPST_Integral_Image(const std::string& test_data_path, OutputLevel outputLevel);
bool nvidia_NPPST_Squared_Integral_Image(const std::string& test_data_path, OutputLevel outputLevel);
bool nvidia_NPPST_RectStdDev(const std::string& test_data_path, OutputLevel outputLevel);

View File

@ -245,8 +245,8 @@ bool TestHaarCascadeApplication::process()
int devId;
ncvAssertCUDAReturn(cudaGetDevice(&devId), false);
cudaDeviceProp devProp;
ncvAssertCUDAReturn(cudaGetDeviceProperties(&devProp, devId), false);
cudaDeviceProp _devProp;
ncvAssertCUDAReturn(cudaGetDeviceProperties(&_devProp, devId), false);
ncvStat = ncvApplyHaarClassifierCascade_device(
d_integralImage, d_rectStdDev, d_pixelMask,
@ -254,7 +254,7 @@ bool TestHaarCascadeApplication::process()
haar, h_HaarStages, d_HaarStages, d_HaarNodes, d_HaarFeatures, false,
searchRoiU, 1, 1.0f,
*this->allocatorGPU.get(), *this->allocatorCPU.get(),
devProp, 0);
_devProp, 0);
ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
NCVMatrixAlloc<Ncv32u> h_pixelMask_d(*this->allocatorCPU.get(), this->width, this->height);

View File

@ -1,4 +1,6 @@
#pragma warning (disable : 4408 4201 4100)
#if defined _MSC_VER && _MSC_VER >= 1200
# pragma warning (disable : 4408 4201 4100)
#endif
#include <cstdio>

View File

@ -39,7 +39,9 @@
//
//M*/
#include "precomp.hpp"
#include "test_precomp.hpp"
#ifdef HAVE_CUDA
namespace {
@ -343,3 +345,5 @@ INSTANTIATE_TEST_CASE_P(GPU_Calib3D, ReprojectImageTo3D, testing::Combine(
WHOLE_SUBMAT));
} // namespace
#endif // HAVE_CUDA

View File

@ -39,7 +39,7 @@
//
//M*/
#include "precomp.hpp"
#include "test_precomp.hpp"
#ifdef HAVE_CUDA

View File

@ -39,7 +39,9 @@
//
//M*/
#include "precomp.hpp"
#include "test_precomp.hpp"
#ifdef HAVE_CUDA
namespace {
@ -98,3 +100,5 @@ INSTANTIATE_TEST_CASE_P(GPU_ImgProc, CopyMakeBorder, testing::Combine(
WHOLE_SUBMAT));
} // namespace
#endif // HAVE_CUDA

View File

@ -39,7 +39,9 @@
//
//M*/
#include "precomp.hpp"
#include "test_precomp.hpp"
#ifdef HAVE_CUDA
namespace {
@ -3396,3 +3398,5 @@ INSTANTIATE_TEST_CASE_P(GPU_Core, Reduce, testing::Combine(
WHOLE_SUBMAT));
} // namespace
#endif // HAVE_CUDA

View File

@ -39,7 +39,9 @@
//
//M*/
#include "precomp.hpp"
#include "test_precomp.hpp"
#ifdef HAVE_CUDA
namespace {
@ -984,3 +986,5 @@ INSTANTIATE_TEST_CASE_P(GPU_Features2D, BruteForceMatcher, testing::Combine(
testing::Values(UseMask(false), UseMask(true))));
} // namespace
#endif // HAVE_CUDA

View File

@ -39,7 +39,9 @@
//
//M*/
#include "precomp.hpp"
#include "test_precomp.hpp"
#ifdef HAVE_CUDA
namespace {
@ -552,3 +554,5 @@ INSTANTIATE_TEST_CASE_P(GPU_Filter, Filter2D, testing::Combine(
WHOLE_SUBMAT));
} // namespace
#endif // HAVE_CUDA

Some files were not shown because too many files have changed in this diff Show More