Dynamic CUDA support library loading implemented for Linux.
Logical mistake in macro fixed; DeviceInfo deligate reimplemented; Build and warning fixes.
This commit is contained in:
parent
d4087f19a2
commit
8660e048bc
@ -1,36 +1,76 @@
|
|||||||
set(the_description "The Core Functionality")
|
set(the_description "The Core Functionality")
|
||||||
|
|
||||||
|
macro(ocv_glob_module_sources_no_cuda)
|
||||||
|
file(GLOB_RECURSE lib_srcs "src/*.cpp")
|
||||||
|
file(GLOB_RECURSE lib_int_hdrs "src/*.hpp" "src/*.h")
|
||||||
|
file(GLOB lib_hdrs "include/opencv2/${name}/*.hpp" "include/opencv2/${name}/*.h")
|
||||||
|
file(GLOB lib_hdrs_detail "include/opencv2/${name}/detail/*.hpp" "include/opencv2/${name}/detail/*.h")
|
||||||
|
|
||||||
|
set(cuda_objs "")
|
||||||
|
set(lib_cuda_hdrs "")
|
||||||
|
if(HAVE_CUDA)
|
||||||
|
ocv_include_directories(${CUDA_INCLUDE_DIRS})
|
||||||
|
file(GLOB lib_cuda_hdrs "src/cuda/*.hpp")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
source_group("Src" FILES ${lib_srcs} ${lib_int_hdrs})
|
||||||
|
|
||||||
|
file(GLOB cl_kernels "src/opencl/*.cl")
|
||||||
|
if(HAVE_opencv_ocl AND cl_kernels)
|
||||||
|
ocv_include_directories(${OPENCL_INCLUDE_DIRS})
|
||||||
|
add_custom_command(
|
||||||
|
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.cpp" "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.hpp"
|
||||||
|
COMMAND ${CMAKE_COMMAND} -DCL_DIR="${CMAKE_CURRENT_SOURCE_DIR}/src/opencl" -DOUTPUT="${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.cpp" -P "${OpenCV_SOURCE_DIR}/cmake/cl2cpp.cmake"
|
||||||
|
DEPENDS ${cl_kernels} "${OpenCV_SOURCE_DIR}/cmake/cl2cpp.cmake")
|
||||||
|
source_group("OpenCL" FILES ${cl_kernels} "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.cpp" "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.hpp")
|
||||||
|
list(APPEND lib_srcs ${cl_kernels} "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.cpp" "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.hpp")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
source_group("Include" FILES ${lib_hdrs})
|
||||||
|
source_group("Include\\detail" FILES ${lib_hdrs_detail})
|
||||||
|
|
||||||
|
ocv_set_module_sources(${ARGN} HEADERS ${lib_hdrs} ${lib_hdrs_detail}
|
||||||
|
SOURCES ${lib_srcs} ${lib_int_hdrs} ${cuda_objs} ${lib_cuda_hdrs})
|
||||||
|
endmacro()
|
||||||
|
|
||||||
|
ocv_add_module(core PRIVATE_REQUIRED ${ZLIB_LIBRARIES})
|
||||||
|
ocv_module_include_directories(${ZLIB_INCLUDE_DIR})
|
||||||
|
|
||||||
if(HAVE_WINRT)
|
if(HAVE_WINRT)
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /ZW /GS /Gm- /AI\"${WINDOWS_SDK_PATH}/References/CommonConfiguration/Neutral\" /AI\"${VISUAL_STUDIO_PATH}/vcpackages\"")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /ZW /GS /Gm- /AI\"${WINDOWS_SDK_PATH}/References/CommonConfiguration/Neutral\" /AI\"${VISUAL_STUDIO_PATH}/vcpackages\"")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if(DYNAMIC_CUDA_SUPPORT)
|
||||||
|
add_definitions(-DDYNAMIC_CUDA_SUPPORT)
|
||||||
|
else()
|
||||||
|
add_definitions(-DUSE_CUDA)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(HAVE_CUDA)
|
||||||
|
ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpu/include")
|
||||||
|
ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef)
|
||||||
|
endif()
|
||||||
|
|
||||||
file(GLOB lib_cuda_hdrs "include/opencv2/${name}/cuda/*.hpp" "include/opencv2/${name}/cuda/*.h")
|
file(GLOB lib_cuda_hdrs "include/opencv2/${name}/cuda/*.hpp" "include/opencv2/${name}/cuda/*.h")
|
||||||
file(GLOB lib_cuda_hdrs_detail "include/opencv2/${name}/cuda/detail/*.hpp" "include/opencv2/${name}/cuda/detail/*.h")
|
file(GLOB lib_cuda_hdrs_detail "include/opencv2/${name}/cuda/detail/*.hpp" "include/opencv2/${name}/cuda/detail/*.h")
|
||||||
|
|
||||||
source_group("Cuda Headers" FILES ${lib_cuda_hdrs})
|
source_group("Cuda Headers" FILES ${lib_cuda_hdrs})
|
||||||
source_group("Cuda Headers\\Detail" FILES ${lib_cuda_hdrs_detail})
|
source_group("Cuda Headers\\Detail" FILES ${lib_cuda_hdrs_detail})
|
||||||
|
|
||||||
if(DYNAMIC_CUDA_SUPPORT)
|
if (DYNAMIC_CUDA_SUPPORT)
|
||||||
add_definitions(-DDYNAMIC_CUDA_SUPPORT)
|
ocv_glob_module_sources_no_cuda(SOURCES "${opencv_core_BINARY_DIR}/version_string.inc"
|
||||||
|
HEADERS ${lib_cuda_hdrs} ${lib_cuda_hdrs_detail})
|
||||||
|
else()
|
||||||
|
ocv_glob_module_sources(SOURCES "${opencv_core_BINARY_DIR}/version_string.inc"
|
||||||
|
HEADERS ${lib_cuda_hdrs} ${lib_cuda_hdrs_detail})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
ocv_add_module(core PRIVATE_REQUIRED ${ZLIB_LIBRARIES})
|
|
||||||
ocv_module_include_directories(${ZLIB_INCLUDE_DIR})
|
|
||||||
|
|
||||||
if(HAVE_CUDA)
|
|
||||||
ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpu/include")
|
|
||||||
ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
ocv_glob_module_sources(SOURCES "${opencv_core_BINARY_DIR}/version_string.inc"
|
|
||||||
HEADERS ${lib_cuda_hdrs} ${lib_cuda_hdrs_detail})
|
|
||||||
|
|
||||||
ocv_create_module()
|
ocv_create_module()
|
||||||
ocv_add_precompiled_headers(${the_module})
|
ocv_add_precompiled_headers(${the_module})
|
||||||
|
|
||||||
ocv_add_accuracy_tests()
|
ocv_add_accuracy_tests()
|
||||||
ocv_add_perf_tests()
|
ocv_add_perf_tests()
|
||||||
|
|
||||||
if(DYNAMIC_CUDA_SUPPORT)
|
if (DYNAMIC_CUDA_SUPPORT)
|
||||||
add_subdirectory(cuda)
|
add_subdirectory(cuda)
|
||||||
endif()
|
endif()
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
project(opencv_core_cuda)
|
project(opencv_core_cuda)
|
||||||
set(HAVE_CUDA FALSE)
|
add_definitions(-DUSE_CUDA)
|
||||||
add_definitions("-DHAVE_CUDA")
|
|
||||||
include_directories(${CUDA_INCLUDE_DIRS}
|
include_directories(${CUDA_INCLUDE_DIRS}
|
||||||
"../src/"
|
"../src/"
|
||||||
"../include/opencv2/core/"
|
"../include/opencv2/core/"
|
||||||
|
@ -1,6 +1,10 @@
|
|||||||
|
#include "cvconfig.h"
|
||||||
#include "opencv2/core/core.hpp"
|
#include "opencv2/core/core.hpp"
|
||||||
#include "opencv2/core/gpumat.hpp"
|
#include "opencv2/core/gpumat.hpp"
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
#ifdef HAVE_CUDA
|
#ifdef HAVE_CUDA
|
||||||
#include <cuda_runtime.h>
|
#include <cuda_runtime.h>
|
||||||
#include <npp.h>
|
#include <npp.h>
|
||||||
@ -17,7 +21,30 @@
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
using namespace cv;
|
using namespace cv;
|
||||||
using namespace cv::gpu;
|
using namespace cv::gpu;
|
||||||
|
|
||||||
#include "gpumat_cuda.hpp"
|
#include "gpumat_cuda.hpp"
|
||||||
|
|
||||||
|
#ifdef HAVE_CUDA
|
||||||
|
static CudaDeviceInfoFuncTable deviceInfoTable;
|
||||||
|
static CudaFuncTable gpuTable;
|
||||||
|
#else
|
||||||
|
static EmptyDeviceInfoFuncTable deviceInfoTable;
|
||||||
|
static EmptyFuncTable gpuTable;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
extern "C" {
|
||||||
|
|
||||||
|
DeviceInfoFuncTable* deviceInfoFactory()
|
||||||
|
{
|
||||||
|
return (DeviceInfoFuncTable*)&deviceInfoTable;
|
||||||
|
}
|
||||||
|
|
||||||
|
GpuFuncTable* gpuFactory()
|
||||||
|
{
|
||||||
|
return (GpuFuncTable*)&gpuTable;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
@ -137,6 +137,9 @@ namespace cv { namespace gpu
|
|||||||
int deviceID() const { return device_id_; }
|
int deviceID() const { return device_id_; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
// Private section is fictive to preserve bin compatibility.
|
||||||
|
// Changes in the private fields there have no effects.
|
||||||
|
// see deligate code.
|
||||||
void query();
|
void query();
|
||||||
|
|
||||||
int device_id_;
|
int device_id_;
|
||||||
|
@ -43,8 +43,9 @@
|
|||||||
#include "precomp.hpp"
|
#include "precomp.hpp"
|
||||||
#include "opencv2/core/gpumat.hpp"
|
#include "opencv2/core/gpumat.hpp"
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
#include <dlfcn.h>
|
||||||
|
|
||||||
#if defined(HAVE_CUDA)
|
#if defined(HAVE_CUDA) || defined(DYNAMIC_CUDA_SUPPORT)
|
||||||
#include <cuda_runtime.h>
|
#include <cuda_runtime.h>
|
||||||
#include <npp.h>
|
#include <npp.h>
|
||||||
|
|
||||||
@ -66,15 +67,81 @@ using namespace cv::gpu;
|
|||||||
|
|
||||||
#include "gpumat_cuda.hpp"
|
#include "gpumat_cuda.hpp"
|
||||||
|
|
||||||
namespace
|
typedef GpuFuncTable* (*GpuFactoryType)();
|
||||||
|
typedef DeviceInfoFuncTable* (*DeviceInfoFactoryType)();
|
||||||
|
|
||||||
|
static GpuFactoryType gpuFactory = NULL;
|
||||||
|
static DeviceInfoFactoryType deviceInfoFactory = NULL;
|
||||||
|
|
||||||
|
static const std::string getCudaSupportLibName()
|
||||||
{
|
{
|
||||||
const GpuFuncTable* gpuFuncTable()
|
return "libopencv_core_cuda.so";
|
||||||
{
|
|
||||||
static EmptyFuncTable funcTable;
|
|
||||||
return &funcTable;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool loadCudaSupportLib()
|
||||||
|
{
|
||||||
|
void* handle;
|
||||||
|
const std::string name = getCudaSupportLibName();
|
||||||
|
handle = dlopen(name.c_str(), RTLD_LAZY);
|
||||||
|
if (!handle)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
deviceInfoFactory = (DeviceInfoFactoryType)dlsym(handle, "deviceInfoFactory");
|
||||||
|
if (!deviceInfoFactory)
|
||||||
|
{
|
||||||
|
dlclose(handle);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
gpuFactory = (GpuFactoryType)dlsym(handle, "gpuFactory");
|
||||||
|
if (!gpuFactory)
|
||||||
|
{
|
||||||
|
dlclose(handle);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
dlclose(handle);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static GpuFuncTable* gpuFuncTable()
|
||||||
|
{
|
||||||
|
#ifdef DYNAMIC_CUDA_SUPPORT
|
||||||
|
static EmptyFuncTable stub;
|
||||||
|
static GpuFuncTable* libFuncTable = loadCudaSupportLib() ? gpuFactory(): (GpuFuncTable*)&stub;
|
||||||
|
static GpuFuncTable *funcTable = libFuncTable ? libFuncTable : (GpuFuncTable*)&stub;
|
||||||
|
#else
|
||||||
|
# ifdef USE_CUDA
|
||||||
|
static CudaFuncTable impl;
|
||||||
|
static GpuFuncTable* funcTable = &impl;
|
||||||
|
#else
|
||||||
|
static EmptyFuncTable stub;
|
||||||
|
static GpuFuncTable* funcTable = &stub;
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
return funcTable;
|
||||||
|
}
|
||||||
|
|
||||||
|
static DeviceInfoFuncTable* deviceInfoFuncTable()
|
||||||
|
{
|
||||||
|
#ifdef DYNAMIC_CUDA_SUPPORT
|
||||||
|
static EmptyDeviceInfoFuncTable stub;
|
||||||
|
static DeviceInfoFuncTable* libFuncTable = loadCudaSupportLib() ? deviceInfoFactory(): (DeviceInfoFuncTable*)&stub;
|
||||||
|
static DeviceInfoFuncTable* funcTable = libFuncTable ? libFuncTable : (DeviceInfoFuncTable*)&stub;
|
||||||
|
#else
|
||||||
|
# ifdef USE_CUDA
|
||||||
|
static CudaDeviceInfoFuncTable impl;
|
||||||
|
static DeviceInfoFuncTable* funcTable = &impl;
|
||||||
|
#else
|
||||||
|
static EmptyFuncTable stub;
|
||||||
|
static DeviceInfoFuncTable* funcTable = &stub;
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
return funcTable;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
//////////////////////////////// Initialization & Info ////////////////////////
|
//////////////////////////////// Initialization & Info ////////////////////////
|
||||||
|
|
||||||
int cv::gpu::getCudaEnabledDeviceCount() { return gpuFuncTable()->getCudaEnabledDeviceCount(); }
|
int cv::gpu::getCudaEnabledDeviceCount() { return gpuFuncTable()->getCudaEnabledDeviceCount(); }
|
||||||
@ -95,13 +162,13 @@ bool cv::gpu::TargetArchs::hasEqualOrGreater(int major, int minor) { return gpuF
|
|||||||
bool cv::gpu::TargetArchs::hasEqualOrGreaterPtx(int major, int minor) { return gpuFuncTable()->hasEqualOrGreaterPtx(major, minor); }
|
bool cv::gpu::TargetArchs::hasEqualOrGreaterPtx(int major, int minor) { return gpuFuncTable()->hasEqualOrGreaterPtx(major, minor); }
|
||||||
bool cv::gpu::TargetArchs::hasEqualOrGreaterBin(int major, int minor) { return gpuFuncTable()->hasEqualOrGreaterBin(major, minor); }
|
bool cv::gpu::TargetArchs::hasEqualOrGreaterBin(int major, int minor) { return gpuFuncTable()->hasEqualOrGreaterBin(major, minor); }
|
||||||
|
|
||||||
size_t cv::gpu::DeviceInfo::sharedMemPerBlock() const { return gpuFuncTable()->sharedMemPerBlock(); }
|
size_t cv::gpu::DeviceInfo::sharedMemPerBlock() const { return deviceInfoFuncTable()->sharedMemPerBlock(); }
|
||||||
void cv::gpu::DeviceInfo::queryMemory(size_t& total_memory, size_t& free_memory) const { gpuFuncTable()->queryMemory(total_memory, free_memory); }
|
void cv::gpu::DeviceInfo::queryMemory(size_t& total_memory, size_t& free_memory) const { deviceInfoFuncTable()->queryMemory(total_memory, free_memory); }
|
||||||
size_t cv::gpu::DeviceInfo::freeMemory() const { return gpuFuncTable()->freeMemory(); }
|
size_t cv::gpu::DeviceInfo::freeMemory() const { return deviceInfoFuncTable()->freeMemory(); }
|
||||||
size_t cv::gpu::DeviceInfo::totalMemory() const { return gpuFuncTable()->totalMemory(); }
|
size_t cv::gpu::DeviceInfo::totalMemory() const { return deviceInfoFuncTable()->totalMemory(); }
|
||||||
bool cv::gpu::DeviceInfo::supports(FeatureSet feature_set) const { return gpuFuncTable()->supports(feature_set); }
|
bool cv::gpu::DeviceInfo::supports(FeatureSet feature_set) const { return deviceInfoFuncTable()->supports(feature_set); }
|
||||||
bool cv::gpu::DeviceInfo::isCompatible() const { return gpuFuncTable()->isCompatible(); }
|
bool cv::gpu::DeviceInfo::isCompatible() const { return deviceInfoFuncTable()->isCompatible(); }
|
||||||
void cv::gpu::DeviceInfo::query() { gpuFuncTable()->query(); }
|
void cv::gpu::DeviceInfo::query() { deviceInfoFuncTable()->query(); }
|
||||||
|
|
||||||
void cv::gpu::printCudaDeviceInfo(int device) { gpuFuncTable()->printCudaDeviceInfo(device); }
|
void cv::gpu::printCudaDeviceInfo(int device) { gpuFuncTable()->printCudaDeviceInfo(device); }
|
||||||
void cv::gpu::printShortCudaDeviceInfo(int device) { gpuFuncTable()->printShortCudaDeviceInfo(device); }
|
void cv::gpu::printShortCudaDeviceInfo(int device) { gpuFuncTable()->printShortCudaDeviceInfo(device); }
|
||||||
@ -556,7 +623,7 @@ namespace cv { namespace gpu
|
|||||||
|
|
||||||
void setTo(GpuMat& src, Scalar s, cudaStream_t stream)
|
void setTo(GpuMat& src, Scalar s, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
gpuFuncTable()->setTo(src, s, stream);
|
gpuFuncTable()->setTo(src, s, cv::gpu::GpuMat(), stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
void setTo(GpuMat& src, Scalar s, const GpuMat& mask, cudaStream_t stream)
|
void setTo(GpuMat& src, Scalar s, const GpuMat& mask, cudaStream_t stream)
|
||||||
|
@ -1,30 +1,19 @@
|
|||||||
namespace
|
#ifndef __GPUMAT_CUDA_HPP__
|
||||||
{
|
#define __GPUMAT_CUDA_HPP__
|
||||||
#if defined(HAVE_CUDA) && !defined(DYNAMIC_CUDA_SUPPORT)
|
|
||||||
|
|
||||||
#define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__, CV_Func)
|
class DeviceInfoFuncTable
|
||||||
#define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__, CV_Func)
|
|
||||||
|
|
||||||
inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "")
|
|
||||||
{
|
{
|
||||||
if (cudaSuccess != err)
|
public:
|
||||||
cv::gpu::error(cudaGetErrorString(err), file, line, func);
|
virtual size_t sharedMemPerBlock() const = 0;
|
||||||
}
|
virtual void queryMemory(size_t&, size_t&) const = 0;
|
||||||
|
virtual size_t freeMemory() const = 0;
|
||||||
inline void ___nppSafeCall(int err, const char *file, const int line, const char *func = "")
|
virtual size_t totalMemory() const = 0;
|
||||||
{
|
virtual bool supports(FeatureSet) const = 0;
|
||||||
if (err < 0)
|
virtual bool isCompatible() const = 0;
|
||||||
{
|
virtual void query() = 0;
|
||||||
std::ostringstream msg;
|
virtual ~DeviceInfoFuncTable() {};
|
||||||
msg << "NPP API Call Error: " << err;
|
};
|
||||||
cv::gpu::error(msg.str().c_str(), file, line, func);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
namespace
|
|
||||||
{
|
|
||||||
class GpuFuncTable
|
class GpuFuncTable
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
@ -40,6 +29,7 @@ namespace
|
|||||||
|
|
||||||
virtual bool deviceSupports(FeatureSet) const = 0;
|
virtual bool deviceSupports(FeatureSet) const = 0;
|
||||||
|
|
||||||
|
// TargetArchs
|
||||||
virtual bool builtWith(FeatureSet) const = 0;
|
virtual bool builtWith(FeatureSet) const = 0;
|
||||||
virtual bool has(int, int) const = 0;
|
virtual bool has(int, int) const = 0;
|
||||||
virtual bool hasPtx(int, int) const = 0;
|
virtual bool hasPtx(int, int) const = 0;
|
||||||
@ -49,14 +39,6 @@ namespace
|
|||||||
virtual bool hasEqualOrGreaterPtx(int, int) const = 0;
|
virtual bool hasEqualOrGreaterPtx(int, int) const = 0;
|
||||||
virtual bool hasEqualOrGreaterBin(int, int) const = 0;
|
virtual bool hasEqualOrGreaterBin(int, int) const = 0;
|
||||||
|
|
||||||
virtual size_t sharedMemPerBlock() const = 0;
|
|
||||||
virtual void queryMemory(size_t&, size_t&) const = 0;
|
|
||||||
virtual size_t freeMemory() const = 0;
|
|
||||||
virtual size_t totalMemory() const = 0;
|
|
||||||
virtual bool supports(FeatureSet) const = 0;
|
|
||||||
virtual bool isCompatible() const = 0;
|
|
||||||
virtual void query() const = 0;
|
|
||||||
|
|
||||||
virtual void printCudaDeviceInfo(int) const = 0;
|
virtual void printCudaDeviceInfo(int) const = 0;
|
||||||
virtual void printShortCudaDeviceInfo(int) const = 0;
|
virtual void printShortCudaDeviceInfo(int) const = 0;
|
||||||
|
|
||||||
@ -72,17 +54,24 @@ namespace
|
|||||||
virtual void convert(const GpuMat& src, GpuMat& dst) const = 0;
|
virtual void convert(const GpuMat& src, GpuMat& dst) const = 0;
|
||||||
|
|
||||||
// for gpu::device::setTo funcs
|
// for gpu::device::setTo funcs
|
||||||
virtual void setTo(cv::gpu::GpuMat&, cv::Scalar, CUstream_st*) const = 0;
|
|
||||||
virtual void setTo(cv::gpu::GpuMat&, cv::Scalar, const cv::gpu::GpuMat&, CUstream_st*) const = 0;
|
virtual void setTo(cv::gpu::GpuMat&, cv::Scalar, const cv::gpu::GpuMat&, CUstream_st*) const = 0;
|
||||||
|
|
||||||
virtual void mallocPitch(void** devPtr, size_t* step, size_t width, size_t height) const = 0;
|
virtual void mallocPitch(void** devPtr, size_t* step, size_t width, size_t height) const = 0;
|
||||||
virtual void free(void* devPtr) const = 0;
|
virtual void free(void* devPtr) const = 0;
|
||||||
};
|
};
|
||||||
}
|
|
||||||
|
|
||||||
#if !defined(HAVE_CUDA) || defined(DYNAMIC_CUDA_SUPPORT)
|
class EmptyDeviceInfoFuncTable: public DeviceInfoFuncTable
|
||||||
namespace
|
{
|
||||||
{
|
public:
|
||||||
|
size_t sharedMemPerBlock() const { throw_nogpu; return 0; }
|
||||||
|
void queryMemory(size_t&, size_t&) const { throw_nogpu; }
|
||||||
|
size_t freeMemory() const { throw_nogpu; return 0; }
|
||||||
|
size_t totalMemory() const { throw_nogpu; return 0; }
|
||||||
|
bool supports(FeatureSet) const { throw_nogpu; return false; }
|
||||||
|
bool isCompatible() const { throw_nogpu; return false; }
|
||||||
|
void query() { throw_nogpu; }
|
||||||
|
};
|
||||||
|
|
||||||
class EmptyFuncTable : public GpuFuncTable
|
class EmptyFuncTable : public GpuFuncTable
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
@ -105,15 +94,7 @@ namespace
|
|||||||
bool hasEqualOrGreater(int, int) const { throw_nogpu; return false; }
|
bool hasEqualOrGreater(int, int) const { throw_nogpu; return false; }
|
||||||
bool hasEqualOrGreaterPtx(int, int) const { throw_nogpu; return false; }
|
bool hasEqualOrGreaterPtx(int, int) const { throw_nogpu; return false; }
|
||||||
bool hasEqualOrGreaterBin(int, int) const { throw_nogpu; return false; }
|
bool hasEqualOrGreaterBin(int, int) const { throw_nogpu; return false; }
|
||||||
|
|
||||||
size_t sharedMemPerBlock() const { throw_nogpu; return 0; }
|
|
||||||
void queryMemory(size_t&, size_t&) const { throw_nogpu; }
|
|
||||||
size_t freeMemory() const { throw_nogpu; return 0; }
|
|
||||||
size_t totalMemory() const { throw_nogpu; return 0; }
|
|
||||||
bool supports(FeatureSet) const { throw_nogpu; return false; }
|
|
||||||
bool isCompatible() const { throw_nogpu; return false; }
|
|
||||||
void query() const { throw_nogpu; }
|
|
||||||
|
|
||||||
void printCudaDeviceInfo(int) const { throw_nogpu; }
|
void printCudaDeviceInfo(int) const { throw_nogpu; }
|
||||||
void printShortCudaDeviceInfo(int) const { throw_nogpu; }
|
void printShortCudaDeviceInfo(int) const { throw_nogpu; }
|
||||||
|
|
||||||
@ -126,15 +107,32 @@ namespace
|
|||||||
void convert(const GpuMat&, GpuMat&) const { throw_nogpu; }
|
void convert(const GpuMat&, GpuMat&) const { throw_nogpu; }
|
||||||
void convert(const GpuMat&, GpuMat&, double, double, cudaStream_t stream = 0) const { (void)stream; throw_nogpu; }
|
void convert(const GpuMat&, GpuMat&, double, double, cudaStream_t stream = 0) const { (void)stream; throw_nogpu; }
|
||||||
|
|
||||||
virtual void setTo(cv::gpu::GpuMat&, cv::Scalar, CUstream_st*) const { throw_nogpu; }
|
|
||||||
virtual void setTo(cv::gpu::GpuMat&, cv::Scalar, const cv::gpu::GpuMat&, CUstream_st*) const { throw_nogpu; }
|
virtual void setTo(cv::gpu::GpuMat&, cv::Scalar, const cv::gpu::GpuMat&, CUstream_st*) const { throw_nogpu; }
|
||||||
|
|
||||||
void mallocPitch(void**, size_t*, size_t, size_t) const { throw_nogpu; }
|
void mallocPitch(void**, size_t*, size_t, size_t) const { throw_nogpu; }
|
||||||
void free(void*) const {}
|
void free(void*) const {}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#if defined(USE_CUDA)
|
||||||
|
|
||||||
|
#define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__, CV_Func)
|
||||||
|
#define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__, CV_Func)
|
||||||
|
|
||||||
|
inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "")
|
||||||
|
{
|
||||||
|
if (cudaSuccess != err)
|
||||||
|
cv::gpu::error(cudaGetErrorString(err), file, line, func);
|
||||||
}
|
}
|
||||||
|
|
||||||
#else
|
inline void ___nppSafeCall(int err, const char *file, const int line, const char *func = "")
|
||||||
|
{
|
||||||
|
if (err < 0)
|
||||||
|
{
|
||||||
|
std::ostringstream msg;
|
||||||
|
msg << "NPP API Call Error: " << err;
|
||||||
|
cv::gpu::error(msg.str().c_str(), file, line, func);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace device
|
namespace cv { namespace gpu { namespace device
|
||||||
{
|
{
|
||||||
@ -149,8 +147,6 @@ namespace cv { namespace gpu { namespace device
|
|||||||
void convert_gpu(PtrStepSzb src, int sdepth, PtrStepSzb dst, int ddepth, double alpha, double beta, cudaStream_t stream);
|
void convert_gpu(PtrStepSzb src, int sdepth, PtrStepSzb dst, int ddepth, double alpha, double beta, cudaStream_t stream);
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
namespace
|
|
||||||
{
|
|
||||||
template <typename T> void kernelSetCaller(GpuMat& src, Scalar s, cudaStream_t stream)
|
template <typename T> void kernelSetCaller(GpuMat& src, Scalar s, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
Scalar_<T> sf = s;
|
Scalar_<T> sf = s;
|
||||||
@ -162,10 +158,7 @@ namespace
|
|||||||
Scalar_<T> sf = s;
|
Scalar_<T> sf = s;
|
||||||
cv::gpu::device::set_to_gpu(src, sf.val, mask, src.channels(), stream);
|
cv::gpu::device::set_to_gpu(src, sf.val, mask, src.channels(), stream);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
namespace
|
|
||||||
{
|
|
||||||
template<int n> struct NPPTypeTraits;
|
template<int n> struct NPPTypeTraits;
|
||||||
template<> struct NPPTypeTraits<CV_8U> { typedef Npp8u npp_type; };
|
template<> struct NPPTypeTraits<CV_8U> { typedef Npp8u npp_type; };
|
||||||
template<> struct NPPTypeTraits<CV_8S> { typedef Npp8s npp_type; };
|
template<> struct NPPTypeTraits<CV_8S> { typedef Npp8s npp_type; };
|
||||||
@ -208,6 +201,7 @@ namespace
|
|||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<int DDEPTH, typename NppConvertFunc<CV_32F, DDEPTH>::func_ptr func> struct NppCvt<CV_32F, DDEPTH, func>
|
template<int DDEPTH, typename NppConvertFunc<CV_32F, DDEPTH>::func_ptr func> struct NppCvt<CV_32F, DDEPTH, func>
|
||||||
{
|
{
|
||||||
typedef typename NPPTypeTraits<DDEPTH>::npp_type dst_t;
|
typedef typename NPPTypeTraits<DDEPTH>::npp_type dst_t;
|
||||||
@ -361,9 +355,8 @@ namespace
|
|||||||
{
|
{
|
||||||
return reinterpret_cast<size_t>(ptr) % size == 0;
|
return reinterpret_cast<size_t>(ptr) % size == 0;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace devices
|
namespace cv { namespace gpu { namespace device
|
||||||
{
|
{
|
||||||
void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream = 0)
|
void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream = 0)
|
||||||
{
|
{
|
||||||
@ -418,74 +411,52 @@ namespace
|
|||||||
{
|
{
|
||||||
setTo(src, s, mask, 0);
|
setTo(src, s, mask, 0);
|
||||||
}
|
}
|
||||||
}}
|
}}}
|
||||||
|
|
||||||
namespace
|
|
||||||
{
|
class CudaArch
|
||||||
class CudaFuncTable : public GpuFuncTable
|
|
||||||
{
|
{
|
||||||
protected:
|
public:
|
||||||
|
CudaArch()
|
||||||
class CudaArch
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
CudaArch();
|
|
||||||
|
|
||||||
bool builtWith(FeatureSet feature_set) const;
|
|
||||||
bool hasPtx(int major, int minor) const;
|
|
||||||
bool hasBin(int major, int minor) const;
|
|
||||||
bool hasEqualOrLessPtx(int major, int minor) const;
|
|
||||||
bool hasEqualOrGreaterPtx(int major, int minor) const;
|
|
||||||
bool hasEqualOrGreaterBin(int major, int minor) const;
|
|
||||||
|
|
||||||
private:
|
|
||||||
static void fromStr(const string& set_as_str, vector<int>& arr);
|
|
||||||
|
|
||||||
vector<int> bin;
|
|
||||||
vector<int> ptx;
|
|
||||||
vector<int> features;
|
|
||||||
};
|
|
||||||
|
|
||||||
const CudaArch cudaArch;
|
|
||||||
|
|
||||||
CudaArch::CudaArch()
|
|
||||||
{
|
{
|
||||||
fromStr(CUDA_ARCH_BIN, bin);
|
fromStr(CUDA_ARCH_BIN, bin);
|
||||||
fromStr(CUDA_ARCH_PTX, ptx);
|
fromStr(CUDA_ARCH_PTX, ptx);
|
||||||
fromStr(CUDA_ARCH_FEATURES, features);
|
fromStr(CUDA_ARCH_FEATURES, features);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool CudaArch::builtWith(FeatureSet feature_set) const
|
bool builtWith(FeatureSet feature_set) const
|
||||||
{
|
{
|
||||||
return !features.empty() && (features.back() >= feature_set);
|
return !features.empty() && (features.back() >= feature_set);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool CudaArch::hasPtx(int major, int minor) const
|
bool hasPtx(int major, int minor) const
|
||||||
{
|
{
|
||||||
return find(ptx.begin(), ptx.end(), major * 10 + minor) != ptx.end();
|
return find(ptx.begin(), ptx.end(), major * 10 + minor) != ptx.end();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool CudaArch::hasBin(int major, int minor) const
|
bool hasBin(int major, int minor) const
|
||||||
{
|
{
|
||||||
return find(bin.begin(), bin.end(), major * 10 + minor) != bin.end();
|
return find(bin.begin(), bin.end(), major * 10 + minor) != bin.end();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool CudaArch::hasEqualOrLessPtx(int major, int minor) const
|
bool hasEqualOrLessPtx(int major, int minor) const
|
||||||
{
|
{
|
||||||
return !ptx.empty() && (ptx.front() <= major * 10 + minor);
|
return !ptx.empty() && (ptx.front() <= major * 10 + minor);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool CudaArch::hasEqualOrGreaterPtx(int major, int minor) const
|
bool hasEqualOrGreaterPtx(int major, int minor) const
|
||||||
{
|
{
|
||||||
return !ptx.empty() && (ptx.back() >= major * 10 + minor);
|
return !ptx.empty() && (ptx.back() >= major * 10 + minor);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool CudaArch::hasEqualOrGreaterBin(int major, int minor) const
|
bool hasEqualOrGreaterBin(int major, int minor) const
|
||||||
{
|
{
|
||||||
return !bin.empty() && (bin.back() >= major * 10 + minor);
|
return !bin.empty() && (bin.back() >= major * 10 + minor);
|
||||||
}
|
}
|
||||||
|
|
||||||
void CudaArch::fromStr(const string& set_as_str, vector<int>& arr)
|
|
||||||
|
private:
|
||||||
|
void fromStr(const string& set_as_str, vector<int>& arr)
|
||||||
{
|
{
|
||||||
if (set_as_str.find_first_not_of(" ") == string::npos)
|
if (set_as_str.find_first_not_of(" ") == string::npos)
|
||||||
return;
|
return;
|
||||||
@ -501,25 +472,21 @@ namespace
|
|||||||
|
|
||||||
sort(arr.begin(), arr.end());
|
sort(arr.begin(), arr.end());
|
||||||
}
|
}
|
||||||
|
|
||||||
class DeviceProps
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
DeviceProps();
|
|
||||||
~DeviceProps();
|
|
||||||
|
|
||||||
cudaDeviceProp* get(int devID);
|
|
||||||
|
|
||||||
private:
|
|
||||||
std::vector<cudaDeviceProp*> props_;
|
|
||||||
};
|
|
||||||
|
|
||||||
DeviceProps::DeviceProps()
|
vector<int> bin;
|
||||||
|
vector<int> ptx;
|
||||||
|
vector<int> features;
|
||||||
|
};
|
||||||
|
|
||||||
|
class DeviceProps
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
DeviceProps()
|
||||||
{
|
{
|
||||||
props_.resize(10, 0);
|
props_.resize(10, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
DeviceProps::~DeviceProps()
|
~DeviceProps()
|
||||||
{
|
{
|
||||||
for (size_t i = 0; i < props_.size(); ++i)
|
for (size_t i = 0; i < props_.size(); ++i)
|
||||||
{
|
{
|
||||||
@ -529,7 +496,7 @@ namespace
|
|||||||
props_.clear();
|
props_.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
cudaDeviceProp* DeviceProps::get(int devID)
|
cudaDeviceProp* get(int devID)
|
||||||
{
|
{
|
||||||
if (devID >= (int) props_.size())
|
if (devID >= (int) props_.size())
|
||||||
props_.resize(devID + 5, 0);
|
props_.resize(devID + 5, 0);
|
||||||
@ -542,10 +509,92 @@ namespace
|
|||||||
|
|
||||||
return props_[devID];
|
return props_[devID];
|
||||||
}
|
}
|
||||||
|
private:
|
||||||
DeviceProps deviceProps;
|
std::vector<cudaDeviceProp*> props_;
|
||||||
|
};
|
||||||
|
|
||||||
int convertSMVer2Cores(int major, int minor)
|
DeviceProps deviceProps;
|
||||||
|
|
||||||
|
class CudaDeviceInfoFuncTable: DeviceInfoFuncTable
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
size_t sharedMemPerBlock() const
|
||||||
|
{
|
||||||
|
return deviceProps.get(device_id_)->sharedMemPerBlock;
|
||||||
|
}
|
||||||
|
|
||||||
|
void queryMemory(size_t& _totalMemory, size_t& _freeMemory) const
|
||||||
|
{
|
||||||
|
int prevDeviceID = getDevice();
|
||||||
|
if (prevDeviceID != device_id_)
|
||||||
|
setDevice(device_id_);
|
||||||
|
|
||||||
|
cudaSafeCall( cudaMemGetInfo(&_freeMemory, &_totalMemory) );
|
||||||
|
|
||||||
|
if (prevDeviceID != device_id_)
|
||||||
|
setDevice(prevDeviceID);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t freeMemory() const
|
||||||
|
{
|
||||||
|
size_t _totalMemory, _freeMemory;
|
||||||
|
queryMemory(_totalMemory, _freeMemory);
|
||||||
|
return _freeMemory;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t totalMemory() const
|
||||||
|
{
|
||||||
|
size_t _totalMemory, _freeMemory;
|
||||||
|
queryMemory(_totalMemory, _freeMemory);
|
||||||
|
return _totalMemory;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool supports(FeatureSet feature_set) const
|
||||||
|
{
|
||||||
|
int version = majorVersion_ * 10 + minorVersion_;
|
||||||
|
return version >= feature_set;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool isCompatible() const
|
||||||
|
{
|
||||||
|
// Check PTX compatibility
|
||||||
|
if (TargetArchs::hasEqualOrLessPtx(majorVersion_, minorVersion_))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
// Check BIN compatibility
|
||||||
|
for (int i = minorVersion_; i >= 0; --i)
|
||||||
|
if (TargetArchs::hasBin(majorVersion_, i))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void query()
|
||||||
|
{
|
||||||
|
const cudaDeviceProp* prop = deviceProps.get(device_id_);
|
||||||
|
|
||||||
|
name_ = prop->name;
|
||||||
|
multi_processor_count_ = prop->multiProcessorCount;
|
||||||
|
majorVersion_ = prop->major;
|
||||||
|
minorVersion_ = prop->minor;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
int device_id_;
|
||||||
|
|
||||||
|
std::string name_;
|
||||||
|
int multi_processor_count_;
|
||||||
|
int majorVersion_;
|
||||||
|
int minorVersion_;
|
||||||
|
};
|
||||||
|
|
||||||
|
class CudaFuncTable : public GpuFuncTable
|
||||||
|
{
|
||||||
|
protected:
|
||||||
|
|
||||||
|
const CudaArch cudaArch;
|
||||||
|
|
||||||
|
int convertSMVer2Cores(int major, int minor) const
|
||||||
{
|
{
|
||||||
// Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
|
// Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
|
||||||
typedef struct {
|
typedef struct {
|
||||||
@ -600,42 +649,42 @@ namespace
|
|||||||
cudaSafeCall( cudaDeviceReset() );
|
cudaSafeCall( cudaDeviceReset() );
|
||||||
}
|
}
|
||||||
|
|
||||||
bool TargetArchs::builtWith(FeatureSet feature_set) const
|
bool builtWith(FeatureSet feature_set) const
|
||||||
{
|
{
|
||||||
return cudaArch.builtWith(feature_set);
|
return cudaArch.builtWith(feature_set);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool TargetArchs::has(int major, int minor) const
|
bool has(int major, int minor) const
|
||||||
{
|
{
|
||||||
return hasPtx(major, minor) || hasBin(major, minor);
|
return hasPtx(major, minor) || hasBin(major, minor);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool TargetArchs::hasPtx(int major, int minor) const
|
bool hasPtx(int major, int minor) const
|
||||||
{
|
{
|
||||||
return cudaArch.hasPtx(major, minor);
|
return cudaArch.hasPtx(major, minor);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool TargetArchs::hasBin(int major, int minor) const
|
bool hasBin(int major, int minor) const
|
||||||
{
|
{
|
||||||
return cudaArch.hasBin(major, minor);
|
return cudaArch.hasBin(major, minor);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool TargetArchs::hasEqualOrLessPtx(int major, int minor) const
|
bool hasEqualOrLessPtx(int major, int minor) const
|
||||||
{
|
{
|
||||||
return cudaArch.hasEqualOrLessPtx(major, minor);
|
return cudaArch.hasEqualOrLessPtx(major, minor);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool TargetArchs::hasEqualOrGreater(int major, int minor) const
|
bool hasEqualOrGreater(int major, int minor) const
|
||||||
{
|
{
|
||||||
return hasEqualOrGreaterPtx(major, minor) || hasEqualOrGreaterBin(major, minor);
|
return hasEqualOrGreaterPtx(major, minor) || hasEqualOrGreaterBin(major, minor);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool TargetArchs::hasEqualOrGreaterPtx(int major, int minor) const
|
bool hasEqualOrGreaterPtx(int major, int minor) const
|
||||||
{
|
{
|
||||||
return cudaArch.hasEqualOrGreaterPtx(major, minor);
|
return cudaArch.hasEqualOrGreaterPtx(major, minor);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool TargetArchs::hasEqualOrGreaterBin(int major, int minor) const
|
bool hasEqualOrGreaterBin(int major, int minor) const
|
||||||
{
|
{
|
||||||
return cudaArch.hasEqualOrGreaterBin(major, minor);
|
return cudaArch.hasEqualOrGreaterBin(major, minor);
|
||||||
}
|
}
|
||||||
@ -664,68 +713,7 @@ namespace
|
|||||||
|
|
||||||
return TargetArchs::builtWith(feature_set) && (version >= feature_set);
|
return TargetArchs::builtWith(feature_set) && (version >= feature_set);
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t sharedMemPerBlock() const
|
|
||||||
{
|
|
||||||
return deviceProps.get(device_id_)->sharedMemPerBlock;
|
|
||||||
}
|
|
||||||
|
|
||||||
void queryMemory(size_t& _totalMemory, size_t& _freeMemory) const
|
|
||||||
{
|
|
||||||
int prevDeviceID = getDevice();
|
|
||||||
if (prevDeviceID != device_id_)
|
|
||||||
setDevice(device_id_);
|
|
||||||
|
|
||||||
cudaSafeCall( cudaMemGetInfo(&_freeMemory, &_totalMemory) );
|
|
||||||
|
|
||||||
if (prevDeviceID != device_id_)
|
|
||||||
setDevice(prevDeviceID);
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t freeMemory() const
|
|
||||||
{
|
|
||||||
size_t _totalMemory, _freeMemory;
|
|
||||||
queryMemory(_totalMemory, _freeMemory);
|
|
||||||
return _freeMemory;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t totalMemory() const
|
|
||||||
{
|
|
||||||
size_t _totalMemory, _freeMemory;
|
|
||||||
queryMemory(_totalMemory, _freeMemory);
|
|
||||||
return _totalMemory;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool supports(FeatureSet feature_set) const
|
|
||||||
{
|
|
||||||
int version = majorVersion() * 10 + minorVersion();
|
|
||||||
return version >= feature_set;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool isCompatible() const
|
|
||||||
{
|
|
||||||
// Check PTX compatibility
|
|
||||||
if (TargetArchs::hasEqualOrLessPtx(majorVersion(), minorVersion()))
|
|
||||||
return true;
|
|
||||||
|
|
||||||
// Check BIN compatibility
|
|
||||||
for (int i = minorVersion(); i >= 0; --i)
|
|
||||||
if (TargetArchs::hasBin(majorVersion(), i))
|
|
||||||
return true;
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
void query() const
|
|
||||||
{
|
|
||||||
const cudaDeviceProp* prop = deviceProps.get(device_id_);
|
|
||||||
|
|
||||||
name_ = prop->name;
|
|
||||||
multi_processor_count_ = prop->multiProcessorCount;
|
|
||||||
majorVersion_ = prop->major;
|
|
||||||
minorVersion_ = prop->minor;
|
|
||||||
}
|
|
||||||
|
|
||||||
void printCudaDeviceInfo(int device) const
|
void printCudaDeviceInfo(int device) const
|
||||||
{
|
{
|
||||||
int count = getCudaEnabledDeviceCount();
|
int count = getCudaEnabledDeviceCount();
|
||||||
@ -864,16 +852,16 @@ namespace
|
|||||||
typedef void (*func_t)(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream);
|
typedef void (*func_t)(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream);
|
||||||
static const func_t funcs[7][4] =
|
static const func_t funcs[7][4] =
|
||||||
{
|
{
|
||||||
/* 8U */ {NppCopyMasked<CV_8U , nppiCopy_8u_C1MR >::call, cv::gpu::details::copyWithMask, NppCopyMasked<CV_8U , nppiCopy_8u_C3MR >::call, NppCopyMasked<CV_8U , nppiCopy_8u_C4MR >::call},
|
/* 8U */ {NppCopyMasked<CV_8U , nppiCopy_8u_C1MR >::call, cv::gpu::device::copyWithMask, NppCopyMasked<CV_8U , nppiCopy_8u_C3MR >::call, NppCopyMasked<CV_8U , nppiCopy_8u_C4MR >::call},
|
||||||
/* 8S */ {cv::gpu::details::copyWithMask , cv::gpu::details::copyWithMask, cv::gpu::details::copyWithMask , cv::gpu::details::copyWithMask },
|
/* 8S */ {cv::gpu::device::copyWithMask , cv::gpu::device::copyWithMask, cv::gpu::device::copyWithMask , cv::gpu::device::copyWithMask },
|
||||||
/* 16U */ {NppCopyMasked<CV_16U, nppiCopy_16u_C1MR>::call, cv::gpu::details::copyWithMask, NppCopyMasked<CV_16U, nppiCopy_16u_C3MR>::call, NppCopyMasked<CV_16U, nppiCopy_16u_C4MR>::call},
|
/* 16U */ {NppCopyMasked<CV_16U, nppiCopy_16u_C1MR>::call, cv::gpu::device::copyWithMask, NppCopyMasked<CV_16U, nppiCopy_16u_C3MR>::call, NppCopyMasked<CV_16U, nppiCopy_16u_C4MR>::call},
|
||||||
/* 16S */ {NppCopyMasked<CV_16S, nppiCopy_16s_C1MR>::call, cv::gpu::details::copyWithMask, NppCopyMasked<CV_16S, nppiCopy_16s_C3MR>::call, NppCopyMasked<CV_16S, nppiCopy_16s_C4MR>::call},
|
/* 16S */ {NppCopyMasked<CV_16S, nppiCopy_16s_C1MR>::call, cv::gpu::device::copyWithMask, NppCopyMasked<CV_16S, nppiCopy_16s_C3MR>::call, NppCopyMasked<CV_16S, nppiCopy_16s_C4MR>::call},
|
||||||
/* 32S */ {NppCopyMasked<CV_32S, nppiCopy_32s_C1MR>::call, cv::gpu::details::copyWithMask, NppCopyMasked<CV_32S, nppiCopy_32s_C3MR>::call, NppCopyMasked<CV_32S, nppiCopy_32s_C4MR>::call},
|
/* 32S */ {NppCopyMasked<CV_32S, nppiCopy_32s_C1MR>::call, cv::gpu::device::copyWithMask, NppCopyMasked<CV_32S, nppiCopy_32s_C3MR>::call, NppCopyMasked<CV_32S, nppiCopy_32s_C4MR>::call},
|
||||||
/* 32F */ {NppCopyMasked<CV_32F, nppiCopy_32f_C1MR>::call, cv::gpu::details::copyWithMask, NppCopyMasked<CV_32F, nppiCopy_32f_C3MR>::call, NppCopyMasked<CV_32F, nppiCopy_32f_C4MR>::call},
|
/* 32F */ {NppCopyMasked<CV_32F, nppiCopy_32f_C1MR>::call, cv::gpu::device::copyWithMask, NppCopyMasked<CV_32F, nppiCopy_32f_C3MR>::call, NppCopyMasked<CV_32F, nppiCopy_32f_C4MR>::call},
|
||||||
/* 64F */ {cv::gpu::details::copyWithMask , cv::gpu::details::copyWithMask, cv::gpu::details::copyWithMask , cv::gpu::details::copyWithMask }
|
/* 64F */ {cv::gpu::device::copyWithMask , cv::gpu::device::copyWithMask, cv::gpu::device::copyWithMask , cv::gpu::device::copyWithMask }
|
||||||
};
|
};
|
||||||
|
|
||||||
const func_t func = mask.channels() == src.channels() ? funcs[src.depth()][src.channels() - 1] : cv::gpu::details::copyWithMask;
|
const func_t func = mask.channels() == src.channels() ? funcs[src.depth()][src.channels() - 1] : cv::gpu::device::copyWithMask;
|
||||||
|
|
||||||
func(src, dst, mask, 0);
|
func(src, dst, mask, 0);
|
||||||
}
|
}
|
||||||
@ -971,7 +959,7 @@ namespace
|
|||||||
func(src, dst);
|
func(src, dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
void convert(const GpuMat& src, GpuMat& dst, double alpha, double beta) const
|
void convert(const GpuMat& src, GpuMat& dst, double alpha, double beta, cudaStream_t stream) const
|
||||||
{
|
{
|
||||||
CV_Assert(src.depth() <= CV_64F && src.channels() <= 4);
|
CV_Assert(src.depth() <= CV_64F && src.channels() <= 4);
|
||||||
CV_Assert(dst.depth() <= CV_64F);
|
CV_Assert(dst.depth() <= CV_64F);
|
||||||
@ -982,10 +970,10 @@ namespace
|
|||||||
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
|
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
|
||||||
}
|
}
|
||||||
|
|
||||||
cv::gpu::device::convertTo(src, dst, alpha, beta);
|
cv::gpu::device::convertTo(src, dst, alpha, beta, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
void setTo(GpuMat& m, Scalar s, const GpuMat& mask) const
|
void setTo(GpuMat& m, Scalar s, const GpuMat& mask, cudaStream_t stream) const
|
||||||
{
|
{
|
||||||
if (mask.empty())
|
if (mask.empty())
|
||||||
{
|
{
|
||||||
@ -1016,7 +1004,7 @@ namespace
|
|||||||
{NppSet<CV_16S, 1, nppiSet_16s_C1R>::call, NppSet<CV_16S, 2, nppiSet_16s_C2R>::call, cv::gpu::device::setTo , NppSet<CV_16S, 4, nppiSet_16s_C4R>::call},
|
{NppSet<CV_16S, 1, nppiSet_16s_C1R>::call, NppSet<CV_16S, 2, nppiSet_16s_C2R>::call, cv::gpu::device::setTo , NppSet<CV_16S, 4, nppiSet_16s_C4R>::call},
|
||||||
{NppSet<CV_32S, 1, nppiSet_32s_C1R>::call, cv::gpu::device::setTo , cv::gpu::device::setTo , NppSet<CV_32S, 4, nppiSet_32s_C4R>::call},
|
{NppSet<CV_32S, 1, nppiSet_32s_C1R>::call, cv::gpu::device::setTo , cv::gpu::device::setTo , NppSet<CV_32S, 4, nppiSet_32s_C4R>::call},
|
||||||
{NppSet<CV_32F, 1, nppiSet_32f_C1R>::call, cv::gpu::device::setTo , cv::gpu::device::setTo , NppSet<CV_32F, 4, nppiSet_32f_C4R>::call},
|
{NppSet<CV_32F, 1, nppiSet_32f_C1R>::call, cv::gpu::device::setTo , cv::gpu::device::setTo , NppSet<CV_32F, 4, nppiSet_32f_C4R>::call},
|
||||||
{cv::gpu::device::setTo , cv::gpu::device::setTo , cv::gpu::device::setTo , cv::gpu::device::setTo }
|
{cv::gpu::device::setTo , cv::gpu::device::setTo , cv::gpu::device::setTo , cv::gpu::device::setTo }
|
||||||
};
|
};
|
||||||
|
|
||||||
CV_Assert(m.depth() <= CV_64F && m.channels() <= 4);
|
CV_Assert(m.depth() <= CV_64F && m.channels() <= 4);
|
||||||
@ -1027,7 +1015,10 @@ namespace
|
|||||||
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
|
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
|
||||||
}
|
}
|
||||||
|
|
||||||
funcs[m.depth()][m.channels() - 1](m, s);
|
if (stream)
|
||||||
|
cv::gpu::device::setTo(m, s, stream);
|
||||||
|
else
|
||||||
|
funcs[m.depth()][m.channels() - 1](m, s);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -1051,7 +1042,10 @@ namespace
|
|||||||
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
|
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
|
||||||
}
|
}
|
||||||
|
|
||||||
funcs[m.depth()][m.channels() - 1](m, s, mask);
|
if (stream)
|
||||||
|
cv::gpu::device::setTo(m, s, mask, stream);
|
||||||
|
else
|
||||||
|
funcs[m.depth()][m.channels() - 1](m, s, mask);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1065,5 +1059,5 @@ namespace
|
|||||||
cudaFree(devPtr);
|
cudaFree(devPtr);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
#endif
|
||||||
#endif
|
#endif
|
Loading…
x
Reference in New Issue
Block a user