diff --git a/modules/core/include/opencv2/core/gpumat.hpp b/modules/core/include/opencv2/core/gpumat.hpp index 1ce35aabf..fda699010 100644 --- a/modules/core/include/opencv2/core/gpumat.hpp +++ b/modules/core/include/opencv2/core/gpumat.hpp @@ -50,6 +50,96 @@ namespace cv { namespace gpu { + //////////////////////////////// Initialization & Info //////////////////////// + + //! This is the only function that do not throw exceptions if the library is compiled without Cuda. + CV_EXPORTS int getCudaEnabledDeviceCount(); + + //! Functions below throw cv::Expception if the library is compiled without Cuda. + + CV_EXPORTS void setDevice(int device); + CV_EXPORTS int getDevice(); + + //! Explicitly destroys and cleans up all resources associated with the current device in the current process. + //! Any subsequent API call to this device will reinitialize the device. + CV_EXPORTS void resetDevice(); + + enum FeatureSet + { + FEATURE_SET_COMPUTE_10 = 10, + FEATURE_SET_COMPUTE_11 = 11, + FEATURE_SET_COMPUTE_12 = 12, + FEATURE_SET_COMPUTE_13 = 13, + FEATURE_SET_COMPUTE_20 = 20, + FEATURE_SET_COMPUTE_21 = 21, + GLOBAL_ATOMICS = FEATURE_SET_COMPUTE_11, + SHARED_ATOMICS = FEATURE_SET_COMPUTE_12, + NATIVE_DOUBLE = FEATURE_SET_COMPUTE_13 + }; + + // Gives information about what GPU archs this OpenCV GPU module was + // compiled for + class CV_EXPORTS TargetArchs + { + public: + static bool builtWith(FeatureSet feature_set); + static bool has(int major, int minor); + static bool hasPtx(int major, int minor); + static bool hasBin(int major, int minor); + static bool hasEqualOrLessPtx(int major, int minor); + static bool hasEqualOrGreater(int major, int minor); + static bool hasEqualOrGreaterPtx(int major, int minor); + static bool hasEqualOrGreaterBin(int major, int minor); + private: + TargetArchs(); + }; + + // Gives information about the given GPU + class CV_EXPORTS DeviceInfo + { + public: + // Creates DeviceInfo object for the current GPU + DeviceInfo() : device_id_(getDevice()) { query(); } + + // Creates DeviceInfo object for the given GPU + DeviceInfo(int device_id) : device_id_(device_id) { query(); } + + std::string name() const { return name_; } + + // Return compute capability versions + int majorVersion() const { return majorVersion_; } + int minorVersion() const { return minorVersion_; } + + int multiProcessorCount() const { return multi_processor_count_; } + + size_t freeMemory() const; + size_t totalMemory() const; + + // Checks whether device supports the given feature + bool supports(FeatureSet feature_set) const; + + // Checks whether the GPU module can be run on the given device + bool isCompatible() const; + + int deviceID() const { return device_id_; } + + private: + void query(); + void queryMemory(size_t& free_memory, size_t& total_memory) const; + + int device_id_; + + std::string name_; + int multi_processor_count_; + int majorVersion_; + int minorVersion_; + }; + + CV_EXPORTS void printCudaDeviceInfo(int device); + CV_EXPORTS void printShortCudaDeviceInfo(int device); + + //////////////////////////////// GpuMat /////////////////////////////// + //! Smart pointer for GPU memory with reference counting. Its interface is mostly similar with cv::Mat. class CV_EXPORTS GpuMat { @@ -75,7 +165,7 @@ namespace cv { namespace gpu //! creates a matrix header for a part of the bigger matrix GpuMat(const GpuMat& m, Range rowRange, Range colRange); GpuMat(const GpuMat& m, Rect roi); - + //! builds GpuMat from Mat. Perfom blocking upload to device. explicit GpuMat(const Mat& m); @@ -84,7 +174,7 @@ namespace cv { namespace gpu //! assignment operators GpuMat& operator = (const GpuMat& m); - + //! pefroms blocking upload data to GpuMat. void upload(const Mat& m); @@ -225,26 +315,26 @@ namespace cv { namespace gpu //////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////// - inline GpuMat::GpuMat() - : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) + inline GpuMat::GpuMat() + : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) { } - inline GpuMat::GpuMat(int rows_, int cols_, int type_) + inline GpuMat::GpuMat(int rows_, int cols_, int type_) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) { if (rows_ > 0 && cols_ > 0) create(rows_, cols_, type_); } - inline GpuMat::GpuMat(Size size_, int type_) + inline GpuMat::GpuMat(Size size_, int type_) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) { if (size_.height > 0 && size_.width > 0) create(size_.height, size_.width, type_); } - inline GpuMat::GpuMat(int rows_, int cols_, int type_, Scalar s_) + inline GpuMat::GpuMat(int rows_, int cols_, int type_, Scalar s_) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) { if (rows_ > 0 && cols_ > 0) @@ -254,7 +344,7 @@ namespace cv { namespace gpu } } - inline GpuMat::GpuMat(Size size_, int type_, Scalar s_) + inline GpuMat::GpuMat(Size size_, int type_, Scalar s_) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) { if (size_.height > 0 && size_.width > 0) @@ -262,11 +352,11 @@ namespace cv { namespace gpu create(size_.height, size_.width, type_); setTo(s_); } - } + } - inline GpuMat::~GpuMat() - { - release(); + inline GpuMat::~GpuMat() + { + release(); } inline GpuMat GpuMat::clone() const @@ -284,14 +374,14 @@ namespace cv { namespace gpu convertTo(m, type); } - inline size_t GpuMat::step1() const - { - return step / elemSize1(); + inline size_t GpuMat::step1() const + { + return step / elemSize1(); } - inline bool GpuMat::empty() const - { - return data == 0; + inline bool GpuMat::empty() const + { + return data == 0; } template inline _Tp* GpuMat::ptr(int y) @@ -304,89 +394,89 @@ namespace cv { namespace gpu return (const _Tp*)ptr(y); } - inline void swap(GpuMat& a, GpuMat& b) - { - a.swap(b); + inline void swap(GpuMat& a, GpuMat& b) + { + a.swap(b); } - inline GpuMat GpuMat::row(int y) const - { - return GpuMat(*this, Range(y, y+1), Range::all()); + inline GpuMat GpuMat::row(int y) const + { + return GpuMat(*this, Range(y, y+1), Range::all()); } - inline GpuMat GpuMat::col(int x) const - { - return GpuMat(*this, Range::all(), Range(x, x+1)); + inline GpuMat GpuMat::col(int x) const + { + return GpuMat(*this, Range::all(), Range(x, x+1)); } - inline GpuMat GpuMat::rowRange(int startrow, int endrow) const - { - return GpuMat(*this, Range(startrow, endrow), Range::all()); + inline GpuMat GpuMat::rowRange(int startrow, int endrow) const + { + return GpuMat(*this, Range(startrow, endrow), Range::all()); } - inline GpuMat GpuMat::rowRange(Range r) const - { - return GpuMat(*this, r, Range::all()); + inline GpuMat GpuMat::rowRange(Range r) const + { + return GpuMat(*this, r, Range::all()); } - inline GpuMat GpuMat::colRange(int startcol, int endcol) const - { - return GpuMat(*this, Range::all(), Range(startcol, endcol)); + inline GpuMat GpuMat::colRange(int startcol, int endcol) const + { + return GpuMat(*this, Range::all(), Range(startcol, endcol)); } - inline GpuMat GpuMat::colRange(Range r) const - { - return GpuMat(*this, Range::all(), r); + inline GpuMat GpuMat::colRange(Range r) const + { + return GpuMat(*this, Range::all(), r); } - inline void GpuMat::create(Size size_, int type_) - { - create(size_.height, size_.width, type_); + inline void GpuMat::create(Size size_, int type_) + { + create(size_.height, size_.width, type_); } - inline GpuMat GpuMat::operator()(Range rowRange, Range colRange) const - { - return GpuMat(*this, rowRange, colRange); + inline GpuMat GpuMat::operator()(Range rowRange, Range colRange) const + { + return GpuMat(*this, rowRange, colRange); } - inline GpuMat GpuMat::operator()(Rect roi) const - { - return GpuMat(*this, roi); + inline GpuMat GpuMat::operator()(Rect roi) const + { + return GpuMat(*this, roi); } - inline bool GpuMat::isContinuous() const - { - return (flags & Mat::CONTINUOUS_FLAG) != 0; + inline bool GpuMat::isContinuous() const + { + return (flags & Mat::CONTINUOUS_FLAG) != 0; } - inline size_t GpuMat::elemSize() const - { - return CV_ELEM_SIZE(flags); + inline size_t GpuMat::elemSize() const + { + return CV_ELEM_SIZE(flags); } - inline size_t GpuMat::elemSize1() const - { - return CV_ELEM_SIZE1(flags); + inline size_t GpuMat::elemSize1() const + { + return CV_ELEM_SIZE1(flags); } - inline int GpuMat::type() const - { - return CV_MAT_TYPE(flags); + inline int GpuMat::type() const + { + return CV_MAT_TYPE(flags); } - inline int GpuMat::depth() const - { - return CV_MAT_DEPTH(flags); + inline int GpuMat::depth() const + { + return CV_MAT_DEPTH(flags); } - inline int GpuMat::channels() const - { - return CV_MAT_CN(flags); + inline int GpuMat::channels() const + { + return CV_MAT_CN(flags); } - inline Size GpuMat::size() const - { - return Size(cols, rows); + inline Size GpuMat::size() const + { + return Size(cols, rows); } inline uchar* GpuMat::ptr(int y) @@ -407,19 +497,19 @@ namespace cv { namespace gpu return *this; } - template inline GpuMat::operator DevMem2D_() const - { - return DevMem2D_(rows, cols, (T*)data, step); + template inline GpuMat::operator DevMem2D_() const + { + return DevMem2D_(rows, cols, (T*)data, step); } - template inline GpuMat::operator PtrStep_() const - { - return PtrStep_(static_cast< DevMem2D_ >(*this)); + template inline GpuMat::operator PtrStep_() const + { + return PtrStep_(static_cast< DevMem2D_ >(*this)); } - template inline GpuMat::operator PtrStep() const - { - return PtrStep((T*)data, step); + template inline GpuMat::operator PtrStep() const + { + return PtrStep((T*)data, step); } inline GpuMat createContinuous(int rows, int cols, int type) diff --git a/modules/core/src/gpumat.cpp b/modules/core/src/gpumat.cpp index 8e69c1864..f194878e1 100644 --- a/modules/core/src/gpumat.cpp +++ b/modules/core/src/gpumat.cpp @@ -46,7 +46,8 @@ #include #ifdef HAVE_CUDA - #include + #include + #include #include #define CUDART_MINIMUM_REQUIRED_VERSION 4010 @@ -65,6 +66,408 @@ using namespace std; using namespace cv; using namespace cv::gpu; +//////////////////////////////// Initialization & Info //////////////////////// + +namespace +{ + // Compares value to set using the given comparator. Returns true if + // there is at least one element x in the set satisfying to: x cmp value + // predicate. + template + bool compareToSet(const std::string& set_as_str, int value, Comparer cmp) + { + if (set_as_str.find_first_not_of(" ") == string::npos) + return false; + + std::stringstream stream(set_as_str); + int cur_value; + + while (!stream.eof()) + { + stream >> cur_value; + if (cmp(cur_value, value)) + return true; + } + + return false; + } +} + +bool cv::gpu::TargetArchs::builtWith(cv::gpu::FeatureSet feature_set) +{ +#ifdef HAVE_CUDA + return ::compareToSet(CUDA_ARCH_FEATURES, feature_set, std::greater_equal()); +#else + (void)feature_set; + return false; +#endif +} + +bool cv::gpu::TargetArchs::has(int major, int minor) +{ + return hasPtx(major, minor) || hasBin(major, minor); +} + +bool cv::gpu::TargetArchs::hasPtx(int major, int minor) +{ +#ifdef HAVE_CUDA + return ::compareToSet(CUDA_ARCH_PTX, major * 10 + minor, std::equal_to()); +#else + (void)major; + (void)minor; + return false; +#endif +} + +bool cv::gpu::TargetArchs::hasBin(int major, int minor) +{ +#if defined (HAVE_CUDA) + return ::compareToSet(CUDA_ARCH_BIN, major * 10 + minor, std::equal_to()); +#else + (void)major; + (void)minor; + return false; +#endif +} + +bool cv::gpu::TargetArchs::hasEqualOrLessPtx(int major, int minor) +{ +#ifdef HAVE_CUDA + return ::compareToSet(CUDA_ARCH_PTX, major * 10 + minor, + std::less_equal()); +#else + (void)major; + (void)minor; + return false; +#endif +} + +bool cv::gpu::TargetArchs::hasEqualOrGreater(int major, int minor) +{ + return hasEqualOrGreaterPtx(major, minor) || + hasEqualOrGreaterBin(major, minor); +} + +bool cv::gpu::TargetArchs::hasEqualOrGreaterPtx(int major, int minor) +{ +#ifdef HAVE_CUDA + return ::compareToSet(CUDA_ARCH_PTX, major * 10 + minor, + std::greater_equal()); +#else + (void)major; + (void)minor; + return false; +#endif +} + +bool cv::gpu::TargetArchs::hasEqualOrGreaterBin(int major, int minor) +{ +#ifdef HAVE_CUDA + return ::compareToSet(CUDA_ARCH_BIN, major * 10 + minor, + std::greater_equal()); +#else + (void)major; + (void)minor; + return false; +#endif +} + +#ifndef HAVE_CUDA + +#define throw_nogpu CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support") + +int cv::gpu::getCudaEnabledDeviceCount() { return 0; } + +void cv::gpu::setDevice(int) { throw_nogpu; } +int cv::gpu::getDevice() { throw_nogpu; return 0; } + +void cv::gpu::resetDevice() { throw_nogpu; } + +size_t cv::gpu::DeviceInfo::freeMemory() const { throw_nogpu; return 0; } +size_t cv::gpu::DeviceInfo::totalMemory() const { throw_nogpu; return 0; } + +bool cv::gpu::DeviceInfo::supports(cv::gpu::FeatureSet) const { throw_nogpu; return false; } + +bool cv::gpu::DeviceInfo::isCompatible() const { throw_nogpu; return false; } + +void cv::gpu::DeviceInfo::query() { throw_nogpu; } +void cv::gpu::DeviceInfo::queryMemory(size_t&, size_t&) const { throw_nogpu; } + +void cv::gpu::printCudaDeviceInfo(int) { throw_nogpu; } +void cv::gpu::printShortCudaDeviceInfo(int) { throw_nogpu; } + +#undef throw_nogpu + +#else // HAVE_CUDA + +namespace +{ +#if defined(__GNUC__) + #define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__, __func__) + #define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__, __func__) +#else /* defined(__CUDACC__) || defined(__MSVC__) */ + #define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__) + #define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__) +#endif + + inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "") + { + if (cudaSuccess != err) + cv::gpu::error(cudaGetErrorString(err), file, line, func); + } + + inline void ___nppSafeCall(int err, const char *file, const int line, const char *func = "") + { + if (err < 0) + { + std::ostringstream msg; + msg << "NPP API Call Error: " << err; + cv::gpu::error(msg.str().c_str(), file, line, func); + } + } +} + +int cv::gpu::getCudaEnabledDeviceCount() +{ + int count; + cudaError_t error = cudaGetDeviceCount( &count ); + + if (error == cudaErrorInsufficientDriver) + return -1; + + if (error == cudaErrorNoDevice) + return 0; + + cudaSafeCall(error); + return count; +} + +void cv::gpu::setDevice(int device) +{ + cudaSafeCall( cudaSetDevice( device ) ); +} + +int cv::gpu::getDevice() +{ + int device; + cudaSafeCall( cudaGetDevice( &device ) ); + return device; +} + +void cv::gpu::resetDevice() +{ + cudaSafeCall( cudaDeviceReset() ); +} + +size_t cv::gpu::DeviceInfo::freeMemory() const +{ + size_t free_memory, total_memory; + queryMemory(free_memory, total_memory); + return free_memory; +} + +size_t cv::gpu::DeviceInfo::totalMemory() const +{ + size_t free_memory, total_memory; + queryMemory(free_memory, total_memory); + return total_memory; +} + +bool cv::gpu::DeviceInfo::supports(cv::gpu::FeatureSet feature_set) const +{ + int version = majorVersion() * 10 + minorVersion(); + return version >= feature_set; +} + +bool cv::gpu::DeviceInfo::isCompatible() const +{ + // Check PTX compatibility + if (TargetArchs::hasEqualOrLessPtx(majorVersion(), minorVersion())) + return true; + + // Check BIN compatibility + for (int i = minorVersion(); i >= 0; --i) + if (TargetArchs::hasBin(majorVersion(), i)) + return true; + + return false; +} + +void cv::gpu::DeviceInfo::query() +{ + cudaDeviceProp prop; + cudaSafeCall(cudaGetDeviceProperties(&prop, device_id_)); + name_ = prop.name; + multi_processor_count_ = prop.multiProcessorCount; + majorVersion_ = prop.major; + minorVersion_ = prop.minor; +} + +void cv::gpu::DeviceInfo::queryMemory(size_t& free_memory, size_t& total_memory) const +{ + int prev_device_id = getDevice(); + if (prev_device_id != device_id_) + setDevice(device_id_); + + cudaSafeCall(cudaMemGetInfo(&free_memory, &total_memory)); + + if (prev_device_id != device_id_) + setDevice(prev_device_id); +} + +namespace +{ + template void getCudaAttribute(T *attribute, CUdevice_attribute device_attribute, int device) + { + *attribute = T(); + CUresult error = CUDA_SUCCESS;// = cuDeviceGetAttribute( attribute, device_attribute, device ); why link erros under ubuntu?? + if( CUDA_SUCCESS == error ) + return; + + printf("Driver API error = %04d\n", error); + cv::gpu::error("driver API error", __FILE__, __LINE__); + } + + int convertSMVer2Cores(int major, int minor) + { + // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM + typedef struct { + int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version + int Cores; + } SMtoCores; + + SMtoCores gpuArchCoresPerSM[] = { { 0x10, 8 }, { 0x11, 8 }, { 0x12, 8 }, { 0x13, 8 }, { 0x20, 32 }, { 0x21, 48 }, { -1, -1 } }; + + int index = 0; + while (gpuArchCoresPerSM[index].SM != -1) + { + if (gpuArchCoresPerSM[index].SM == ((major << 4) + minor) ) + return gpuArchCoresPerSM[index].Cores; + index++; + } + printf("MapSMtoCores undefined SMversion %d.%d!\n", major, minor); + return -1; + } +} + +void cv::gpu::printCudaDeviceInfo(int device) +{ + int count = getCudaEnabledDeviceCount(); + bool valid = (device >= 0) && (device < count); + + int beg = valid ? device : 0; + int end = valid ? device+1 : count; + + printf("*** CUDA Device Query (Runtime API) version (CUDART static linking) *** \n\n"); + printf("Device count: %d\n", count); + + int driverVersion = 0, runtimeVersion = 0; + cudaSafeCall( cudaDriverGetVersion(&driverVersion) ); + cudaSafeCall( cudaRuntimeGetVersion(&runtimeVersion) ); + + const char *computeMode[] = { + "Default (multiple host threads can use ::cudaSetDevice() with device simultaneously)", + "Exclusive (only one host thread in one process is able to use ::cudaSetDevice() with this device)", + "Prohibited (no host thread can use ::cudaSetDevice() with this device)", + "Exclusive Process (many threads in one process is able to use ::cudaSetDevice() with this device)", + "Unknown", + NULL + }; + + for(int dev = beg; dev < end; ++dev) + { + cudaDeviceProp prop; + cudaSafeCall( cudaGetDeviceProperties(&prop, dev) ); + + printf("\nDevice %d: \"%s\"\n", dev, prop.name); + printf(" CUDA Driver Version / Runtime Version %d.%d / %d.%d\n", driverVersion/1000, driverVersion%100, runtimeVersion/1000, runtimeVersion%100); + printf(" CUDA Capability Major/Minor version number: %d.%d\n", prop.major, prop.minor); + printf(" Total amount of global memory: %.0f MBytes (%llu bytes)\n", (float)prop.totalGlobalMem/1048576.0f, (unsigned long long) prop.totalGlobalMem); + printf(" (%2d) Multiprocessors x (%2d) CUDA Cores/MP: %d CUDA Cores\n", + prop.multiProcessorCount, convertSMVer2Cores(prop.major, prop.minor), + convertSMVer2Cores(prop.major, prop.minor) * prop.multiProcessorCount); + printf(" GPU Clock Speed: %.2f GHz\n", prop.clockRate * 1e-6f); + + // This is not available in the CUDA Runtime API, so we make the necessary calls the driver API to support this for output + int memoryClock, memBusWidth, L2CacheSize; + getCudaAttribute( &memoryClock, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, dev ); + getCudaAttribute( &memBusWidth, CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, dev ); + getCudaAttribute( &L2CacheSize, CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, dev ); + + printf(" Memory Clock rate: %.2f Mhz\n", memoryClock * 1e-3f); + printf(" Memory Bus Width: %d-bit\n", memBusWidth); + if (L2CacheSize) + printf(" L2 Cache Size: %d bytes\n", L2CacheSize); + + printf(" Max Texture Dimension Size (x,y,z) 1D=(%d), 2D=(%d,%d), 3D=(%d,%d,%d)\n", + prop.maxTexture1D, prop.maxTexture2D[0], prop.maxTexture2D[1], + prop.maxTexture3D[0], prop.maxTexture3D[1], prop.maxTexture3D[2]); + printf(" Max Layered Texture Size (dim) x layers 1D=(%d) x %d, 2D=(%d,%d) x %d\n", + prop.maxTexture1DLayered[0], prop.maxTexture1DLayered[1], + prop.maxTexture2DLayered[0], prop.maxTexture2DLayered[1], prop.maxTexture2DLayered[2]); + + printf(" Total amount of constant memory: %u bytes\n", (int)prop.totalConstMem); + printf(" Total amount of shared memory per block: %u bytes\n", (int)prop.sharedMemPerBlock); + printf(" Total number of registers available per block: %d\n", prop.regsPerBlock); + printf(" Warp size: %d\n", prop.warpSize); + printf(" Maximum number of threads per block: %d\n", prop.maxThreadsPerBlock); + printf(" Maximum sizes of each dimension of a block: %d x %d x %d\n", prop.maxThreadsDim[0], prop.maxThreadsDim[1], prop.maxThreadsDim[2]); + printf(" Maximum sizes of each dimension of a grid: %d x %d x %d\n", prop.maxGridSize[0], prop.maxGridSize[1], prop.maxGridSize[2]); + printf(" Maximum memory pitch: %u bytes\n", (int)prop.memPitch); + printf(" Texture alignment: %u bytes\n", (int)prop.textureAlignment); + + printf(" Concurrent copy and execution: %s with %d copy engine(s)\n", (prop.deviceOverlap ? "Yes" : "No"), prop.asyncEngineCount); + printf(" Run time limit on kernels: %s\n", prop.kernelExecTimeoutEnabled ? "Yes" : "No"); + printf(" Integrated GPU sharing Host Memory: %s\n", prop.integrated ? "Yes" : "No"); + printf(" Support host page-locked memory mapping: %s\n", prop.canMapHostMemory ? "Yes" : "No"); + + printf(" Concurrent kernel execution: %s\n", prop.concurrentKernels ? "Yes" : "No"); + printf(" Alignment requirement for Surfaces: %s\n", prop.surfaceAlignment ? "Yes" : "No"); + printf(" Device has ECC support enabled: %s\n", prop.ECCEnabled ? "Yes" : "No"); + printf(" Device is using TCC driver mode: %s\n", prop.tccDriver ? "Yes" : "No"); + printf(" Device supports Unified Addressing (UVA): %s\n", prop.unifiedAddressing ? "Yes" : "No"); + printf(" Device PCI Bus ID / PCI location ID: %d / %d\n", prop.pciBusID, prop.pciDeviceID ); + printf(" Compute Mode:\n"); + printf(" %s \n", computeMode[prop.computeMode]); + } + + printf("\n"); + printf("deviceQuery, CUDA Driver = CUDART"); + printf(", CUDA Driver Version = %d.%d", driverVersion / 1000, driverVersion % 100); + printf(", CUDA Runtime Version = %d.%d", runtimeVersion/1000, runtimeVersion%100); + printf(", NumDevs = %d\n\n", count); + fflush(stdout); +} + +void cv::gpu::printShortCudaDeviceInfo(int device) +{ + int count = getCudaEnabledDeviceCount(); + bool valid = (device >= 0) && (device < count); + + int beg = valid ? device : 0; + int end = valid ? device+1 : count; + + int driverVersion = 0, runtimeVersion = 0; + cudaSafeCall( cudaDriverGetVersion(&driverVersion) ); + cudaSafeCall( cudaRuntimeGetVersion(&runtimeVersion) ); + + for(int dev = beg; dev < end; ++dev) + { + cudaDeviceProp prop; + cudaSafeCall( cudaGetDeviceProperties(&prop, dev) ); + + const char *arch_str = prop.major < 2 ? " (not Fermi)" : ""; + printf("Device %d: \"%s\" %.0fMb", dev, prop.name, (float)prop.totalGlobalMem/1048576.0f); + printf(", sm_%d%d%s, %d cores", prop.major, prop.minor, arch_str, convertSMVer2Cores(prop.major, prop.minor) * prop.multiProcessorCount); + printf(", Driver/Runtime ver.%d.%d/%d.%d\n", driverVersion/1000, driverVersion%100, runtimeVersion/1000, runtimeVersion%100); + } + fflush(stdout); +} + +#endif // HAVE_CUDA + +//////////////////////////////// GpuMat /////////////////////////////// + cv::gpu::GpuMat::GpuMat(const GpuMat& m) : flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend) { @@ -326,25 +729,23 @@ namespace #ifndef HAVE_CUDA -#define throw_nocuda CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support") - namespace { class EmptyFuncTable : public GpuFuncTable { public: - void copy(const Mat&, GpuMat&) const { throw_nocuda; } - void copy(const GpuMat&, Mat&) const { throw_nocuda; } - void copy(const GpuMat&, GpuMat&) const { throw_nocuda; } + void copy(const Mat&, GpuMat&) const { CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support"); } + void copy(const GpuMat&, Mat&) const { CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support"); } + void copy(const GpuMat&, GpuMat&) const { CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support"); } - void copyWithMask(const GpuMat&, GpuMat&, const GpuMat&) const { throw_nocuda; } + void copyWithMask(const GpuMat&, GpuMat&, const GpuMat&) const { CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support"); } - void convert(const GpuMat&, GpuMat&) const { throw_nocuda; } - void convert(const GpuMat&, GpuMat&, double, double) const { throw_nocuda; } + void convert(const GpuMat&, GpuMat&) const { CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support"); } + void convert(const GpuMat&, GpuMat&, double, double) const { CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support"); } - void setTo(GpuMat&, Scalar, const GpuMat&) const { throw_nocuda; } + void setTo(GpuMat&, Scalar, const GpuMat&) const { CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support"); } - void mallocPitch(void**, size_t*, size_t, size_t) const { throw_nocuda; } + void mallocPitch(void**, size_t*, size_t, size_t) const { CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support"); } void free(void*) const {} }; @@ -370,33 +771,6 @@ namespace cv { namespace gpu { namespace device void convert_gpu(DevMem2Db src, int sdepth, DevMem2Db dst, int ddepth, double alpha, double beta, cudaStream_t stream); }}} -namespace -{ -#if defined(__GNUC__) - #define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__, __func__) - #define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__, __func__) -#else /* defined(__CUDACC__) || defined(__MSVC__) */ - #define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__) - #define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__) -#endif - - inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "") - { - if (cudaSuccess != err) - cv::gpu::error(cudaGetErrorString(err), file, line, func); - } - - inline void ___nppSafeCall(int err, const char *file, const int line, const char *func = "") - { - if (err < 0) - { - std::ostringstream msg; - msg << "NPP API Call Error: " << err; - cv::gpu::error(msg.str().c_str(), file, line, func); - } - } -} - namespace { template void kernelSetCaller(GpuMat& src, Scalar s, cudaStream_t stream) @@ -502,7 +876,7 @@ namespace typedef typename NPPTypeTraits::npp_type src_t; typedef typename NPPTypeTraits::npp_type dst_t; - static void cvt(const GpuMat& src, GpuMat& dst) + static void call(const GpuMat& src, GpuMat& dst) { NppiSize sz; sz.width = src.cols; @@ -517,7 +891,7 @@ namespace { typedef typename NPPTypeTraits::npp_type dst_t; - static void cvt(const GpuMat& src, GpuMat& dst) + static void call(const GpuMat& src, GpuMat& dst) { NppiSize sz; sz.width = src.cols; @@ -557,7 +931,7 @@ namespace { typedef typename NPPTypeTraits::npp_type src_t; - static void set(GpuMat& src, Scalar s) + static void call(GpuMat& src, Scalar s) { NppiSize sz; sz.width = src.cols; @@ -574,7 +948,7 @@ namespace { typedef typename NPPTypeTraits::npp_type src_t; - static void set(GpuMat& src, Scalar s) + static void call(GpuMat& src, Scalar s) { NppiSize sz; sz.width = src.cols; @@ -605,7 +979,7 @@ namespace { typedef typename NPPTypeTraits::npp_type src_t; - static void set(GpuMat& src, Scalar s, const GpuMat& mask) + static void call(GpuMat& src, Scalar s, const GpuMat& mask) { NppiSize sz; sz.width = src.cols; @@ -622,7 +996,7 @@ namespace { typedef typename NPPTypeTraits::npp_type src_t; - static void set(GpuMat& src, Scalar s, const GpuMat& mask) + static void call(GpuMat& src, Scalar s, const GpuMat& mask) { NppiSize sz; sz.width = src.cols; @@ -650,7 +1024,7 @@ namespace { typedef typename NPPTypeTraits::npp_type src_t; - static void copyMasked(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t /*stream*/) + static void call(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t /*stream*/) { NppiSize sz; sz.width = src.cols; @@ -683,99 +1057,114 @@ namespace void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask) const { + CV_Assert(src.depth() <= CV_64F && src.channels() <= 4); CV_Assert(src.size() == dst.size() && src.type() == dst.type()); CV_Assert(src.size() == mask.size() && mask.depth() == CV_8U && (mask.channels() == 1 || mask.channels() == src.channels())); - typedef void (*caller_t)(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream); - - static const caller_t callers[7][4] = + if (src.depth() == CV_64F) { - /* 8U */ {NppCopyMasked::copyMasked, cv::gpu::copyWithMask, NppCopyMasked::copyMasked, NppCopyMasked::copyMasked}, - /* 8S */ {cv::gpu::copyWithMask, cv::gpu::copyWithMask, cv::gpu::copyWithMask, cv::gpu::copyWithMask}, - /* 16U */ {NppCopyMasked::copyMasked, cv::gpu::copyWithMask, NppCopyMasked::copyMasked, NppCopyMasked::copyMasked}, - /* 16S */ {NppCopyMasked::copyMasked, cv::gpu::copyWithMask, NppCopyMasked::copyMasked, NppCopyMasked::copyMasked}, - /* 32S */ {NppCopyMasked::copyMasked, cv::gpu::copyWithMask, NppCopyMasked::copyMasked, NppCopyMasked::copyMasked}, - /* 32F */ {NppCopyMasked::copyMasked, cv::gpu::copyWithMask, NppCopyMasked::copyMasked, NppCopyMasked::copyMasked}, - /* 64F */ {cv::gpu::copyWithMask, cv::gpu::copyWithMask, cv::gpu::copyWithMask, cv::gpu::copyWithMask} + if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE)) + CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); + } + + typedef void (*func_t)(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream); + static const func_t funcs[7][4] = + { + /* 8U */ {NppCopyMasked::call, cv::gpu::copyWithMask, NppCopyMasked::call, NppCopyMasked::call}, + /* 8S */ {cv::gpu::copyWithMask , cv::gpu::copyWithMask, cv::gpu::copyWithMask , cv::gpu::copyWithMask }, + /* 16U */ {NppCopyMasked::call, cv::gpu::copyWithMask, NppCopyMasked::call, NppCopyMasked::call}, + /* 16S */ {NppCopyMasked::call, cv::gpu::copyWithMask, NppCopyMasked::call, NppCopyMasked::call}, + /* 32S */ {NppCopyMasked::call, cv::gpu::copyWithMask, NppCopyMasked::call, NppCopyMasked::call}, + /* 32F */ {NppCopyMasked::call, cv::gpu::copyWithMask, NppCopyMasked::call, NppCopyMasked::call}, + /* 64F */ {cv::gpu::copyWithMask , cv::gpu::copyWithMask, cv::gpu::copyWithMask , cv::gpu::copyWithMask } }; - caller_t func = mask.channels() == src.channels() ? callers[src.depth()][src.channels()] : cv::gpu::copyWithMask; - CV_DbgAssert(func != 0); + const func_t func = mask.channels() == src.channels() ? funcs[src.depth()][src.channels() - 1] : cv::gpu::copyWithMask; func(src, dst, mask, 0); } void convert(const GpuMat& src, GpuMat& dst) const { - typedef void (*caller_t)(const GpuMat& src, GpuMat& dst); - static const caller_t callers[7][7][7] = + typedef void (*func_t)(const GpuMat& src, GpuMat& dst); + static const func_t funcs[7][7][4] = { { /* 8U -> 8U */ {0, 0, 0, 0}, - /* 8U -> 8S */ {cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo}, - /* 8U -> 16U */ {NppCvt::cvt,cv::gpu::convertTo,cv::gpu::convertTo,NppCvt::cvt}, - /* 8U -> 16S */ {NppCvt::cvt,cv::gpu::convertTo,cv::gpu::convertTo,NppCvt::cvt}, - /* 8U -> 32S */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo}, - /* 8U -> 32F */ {NppCvt::cvt,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo}, - /* 8U -> 64F */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo} + /* 8U -> 8S */ {cv::gpu::convertTo , cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo }, + /* 8U -> 16U */ {NppCvt::call, cv::gpu::convertTo, cv::gpu::convertTo, NppCvt::call}, + /* 8U -> 16S */ {NppCvt::call, cv::gpu::convertTo, cv::gpu::convertTo, NppCvt::call}, + /* 8U -> 32S */ {cv::gpu::convertTo , cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo }, + /* 8U -> 32F */ {NppCvt::call, cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo }, + /* 8U -> 64F */ {cv::gpu::convertTo , cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo } }, { - /* 8S -> 8U */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo}, + /* 8S -> 8U */ {cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo}, /* 8S -> 8S */ {0,0,0,0}, - /* 8S -> 16U */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo}, - /* 8S -> 16S */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo}, - /* 8S -> 32S */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo}, - /* 8S -> 32F */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo}, - /* 8S -> 64F */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo} + /* 8S -> 16U */ {cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo}, + /* 8S -> 16S */ {cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo}, + /* 8S -> 32S */ {cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo}, + /* 8S -> 32F */ {cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo}, + /* 8S -> 64F */ {cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo} }, { - /* 16U -> 8U */ {NppCvt::cvt,cv::gpu::convertTo,cv::gpu::convertTo,NppCvt::cvt}, - /* 16U -> 8S */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo}, + /* 16U -> 8U */ {NppCvt::call, cv::gpu::convertTo, cv::gpu::convertTo, NppCvt::call}, + /* 16U -> 8S */ {cv::gpu::convertTo , cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo }, /* 16U -> 16U */ {0,0,0,0}, - /* 16U -> 16S */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo}, - /* 16U -> 32S */ {NppCvt::cvt,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo}, - /* 16U -> 32F */ {NppCvt::cvt,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo}, - /* 16U -> 64F */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo} + /* 16U -> 16S */ {cv::gpu::convertTo , cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo }, + /* 16U -> 32S */ {NppCvt::call, cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo }, + /* 16U -> 32F */ {NppCvt::call, cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo }, + /* 16U -> 64F */ {cv::gpu::convertTo , cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo } }, { - /* 16S -> 8U */ {NppCvt::cvt,cv::gpu::convertTo,cv::gpu::convertTo,NppCvt::cvt}, - /* 16S -> 8S */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo}, - /* 16S -> 16U */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo}, + /* 16S -> 8U */ {NppCvt::call, cv::gpu::convertTo, cv::gpu::convertTo, NppCvt::call}, + /* 16S -> 8S */ {cv::gpu::convertTo , cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo }, + /* 16S -> 16U */ {cv::gpu::convertTo , cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo }, /* 16S -> 16S */ {0,0,0,0}, - /* 16S -> 32S */ {NppCvt::cvt,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo}, - /* 16S -> 32F */ {NppCvt::cvt,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo}, - /* 16S -> 64F */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo} + /* 16S -> 32S */ {NppCvt::call, cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo }, + /* 16S -> 32F */ {NppCvt::call, cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo }, + /* 16S -> 64F */ {cv::gpu::convertTo , cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo } }, { - /* 32S -> 8U */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo}, - /* 32S -> 8S */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo}, - /* 32S -> 16U */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo}, - /* 32S -> 16S */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo}, + /* 32S -> 8U */ {cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo}, + /* 32S -> 8S */ {cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo}, + /* 32S -> 16U */ {cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo}, + /* 32S -> 16S */ {cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo}, /* 32S -> 32S */ {0,0,0,0}, - /* 32S -> 32F */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo}, - /* 32S -> 64F */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo} + /* 32S -> 32F */ {cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo}, + /* 32S -> 64F */ {cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo} }, { - /* 32F -> 8U */ {NppCvt::cvt,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo}, - /* 32F -> 8S */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo}, - /* 32F -> 16U */ {NppCvt::cvt,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo}, - /* 32F -> 16S */ {NppCvt::cvt,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo}, - /* 32F -> 32S */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo}, + /* 32F -> 8U */ {NppCvt::call, cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo}, + /* 32F -> 8S */ {cv::gpu::convertTo , cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo}, + /* 32F -> 16U */ {NppCvt::call, cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo}, + /* 32F -> 16S */ {NppCvt::call, cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo}, + /* 32F -> 32S */ {cv::gpu::convertTo , cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo}, /* 32F -> 32F */ {0,0,0,0}, - /* 32F -> 64F */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo} + /* 32F -> 64F */ {cv::gpu::convertTo , cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo} }, { - /* 64F -> 8U */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo}, - /* 64F -> 8S */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo}, - /* 64F -> 16U */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo}, - /* 64F -> 16S */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo}, - /* 64F -> 32S */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo}, - /* 64F -> 32F */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo}, + /* 64F -> 8U */ {cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo}, + /* 64F -> 8S */ {cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo}, + /* 64F -> 16U */ {cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo}, + /* 64F -> 16S */ {cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo}, + /* 64F -> 32S */ {cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo}, + /* 64F -> 32F */ {cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo}, /* 64F -> 64F */ {0,0,0,0} } }; - caller_t func = callers[src.depth()][dst.depth()][src.channels() - 1]; + CV_Assert(src.depth() <= CV_64F && src.channels() <= 4); + CV_Assert(dst.depth() <= CV_64F); + CV_Assert(src.size() == dst.size() && src.channels() == dst.channels()); + + if (src.depth() == CV_64F || dst.depth() == CV_64F) + { + if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE)) + CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); + } + + const func_t func = funcs[src.depth()][dst.depth()][src.channels() - 1]; CV_DbgAssert(func != 0); func(src, dst); @@ -783,6 +1172,15 @@ namespace void convert(const GpuMat& src, GpuMat& dst, double alpha, double beta) const { + CV_Assert(src.depth() <= CV_64F && src.channels() <= 4); + CV_Assert(dst.depth() <= CV_64F); + + if (src.depth() == CV_64F || dst.depth() == CV_64F) + { + if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE)) + CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); + } + cv::gpu::convertTo(src, dst, alpha, beta); } @@ -812,36 +1210,51 @@ namespace } } - typedef void (*caller_t)(GpuMat& src, Scalar s); - static const caller_t callers[7][4] = + typedef void (*func_t)(GpuMat& src, Scalar s); + static const func_t funcs[7][4] = { - {NppSet::set, cv::gpu::setTo, cv::gpu::setTo, NppSet::set}, - {NppSet::set, NppSet::set, NppSet::set, NppSet::set}, - {NppSet::set, NppSet::set, cv::gpu::setTo, NppSet::set}, - {NppSet::set, NppSet::set, cv::gpu::setTo, NppSet::set}, - {NppSet::set, cv::gpu::setTo, cv::gpu::setTo, NppSet::set}, - {NppSet::set, cv::gpu::setTo, cv::gpu::setTo, NppSet::set}, - {cv::gpu::setTo, cv::gpu::setTo, cv::gpu::setTo, cv::gpu::setTo} + {NppSet::call, cv::gpu::setTo , cv::gpu::setTo , NppSet::call}, + {NppSet::call, NppSet::call, NppSet::call, NppSet::call}, + {NppSet::call, NppSet::call, cv::gpu::setTo , NppSet::call}, + {NppSet::call, NppSet::call, cv::gpu::setTo , NppSet::call}, + {NppSet::call, cv::gpu::setTo , cv::gpu::setTo , NppSet::call}, + {NppSet::call, cv::gpu::setTo , cv::gpu::setTo , NppSet::call}, + {cv::gpu::setTo , cv::gpu::setTo , cv::gpu::setTo , cv::gpu::setTo } }; - callers[m.depth()][m.channels() - 1](m, s); + CV_Assert(m.depth() <= CV_64F && m.channels() <= 4); + + if (m.depth() == CV_64F) + { + if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE)) + CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); + } + + funcs[m.depth()][m.channels() - 1](m, s); } else { - typedef void (*caller_t)(GpuMat& src, Scalar s, const GpuMat& mask); - - static const caller_t callers[7][4] = + typedef void (*func_t)(GpuMat& src, Scalar s, const GpuMat& mask); + static const func_t funcs[7][4] = { - {NppSetMask::set, cv::gpu::setTo, cv::gpu::setTo, NppSetMask::set}, - {cv::gpu::setTo, cv::gpu::setTo, cv::gpu::setTo, cv::gpu::setTo}, - {NppSetMask::set, cv::gpu::setTo, cv::gpu::setTo, NppSetMask::set}, - {NppSetMask::set, cv::gpu::setTo, cv::gpu::setTo, NppSetMask::set}, - {NppSetMask::set, cv::gpu::setTo, cv::gpu::setTo, NppSetMask::set}, - {NppSetMask::set, cv::gpu::setTo, cv::gpu::setTo, NppSetMask::set}, - {cv::gpu::setTo, cv::gpu::setTo, cv::gpu::setTo, cv::gpu::setTo} + {NppSetMask::call, cv::gpu::setTo, cv::gpu::setTo, NppSetMask::call}, + {cv::gpu::setTo , cv::gpu::setTo, cv::gpu::setTo, cv::gpu::setTo }, + {NppSetMask::call, cv::gpu::setTo, cv::gpu::setTo, NppSetMask::call}, + {NppSetMask::call, cv::gpu::setTo, cv::gpu::setTo, NppSetMask::call}, + {NppSetMask::call, cv::gpu::setTo, cv::gpu::setTo, NppSetMask::call}, + {NppSetMask::call, cv::gpu::setTo, cv::gpu::setTo, NppSetMask::call}, + {cv::gpu::setTo , cv::gpu::setTo, cv::gpu::setTo, cv::gpu::setTo } }; - callers[m.depth()][m.channels() - 1](m, s, mask); + CV_Assert(m.depth() <= CV_64F && m.channels() <= 4); + + if (m.depth() == CV_64F) + { + if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE)) + CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); + } + + funcs[m.depth()][m.channels() - 1](m, s, mask); } } diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index 08c6be5b7..d02abefab 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -54,94 +54,6 @@ namespace cv { namespace gpu { -//////////////////////////////// Initialization & Info //////////////////////// - -//! This is the only function that do not throw exceptions if the library is compiled without Cuda. -CV_EXPORTS int getCudaEnabledDeviceCount(); - -//! Functions below throw cv::Expception if the library is compiled without Cuda. - -CV_EXPORTS void setDevice(int device); -CV_EXPORTS int getDevice(); - -//! Explicitly destroys and cleans up all resources associated with the current device in the current process. -//! Any subsequent API call to this device will reinitialize the device. -CV_EXPORTS void resetDevice(); - -enum FeatureSet -{ - FEATURE_SET_COMPUTE_10 = 10, - FEATURE_SET_COMPUTE_11 = 11, - FEATURE_SET_COMPUTE_12 = 12, - FEATURE_SET_COMPUTE_13 = 13, - FEATURE_SET_COMPUTE_20 = 20, - FEATURE_SET_COMPUTE_21 = 21, - GLOBAL_ATOMICS = FEATURE_SET_COMPUTE_11, - SHARED_ATOMICS = FEATURE_SET_COMPUTE_12, - NATIVE_DOUBLE = FEATURE_SET_COMPUTE_13 -}; - -// Gives information about what GPU archs this OpenCV GPU module was -// compiled for -class CV_EXPORTS TargetArchs -{ -public: - static bool builtWith(FeatureSet feature_set); - static bool has(int major, int minor); - static bool hasPtx(int major, int minor); - static bool hasBin(int major, int minor); - static bool hasEqualOrLessPtx(int major, int minor); - static bool hasEqualOrGreater(int major, int minor); - static bool hasEqualOrGreaterPtx(int major, int minor); - static bool hasEqualOrGreaterBin(int major, int minor); -private: - TargetArchs(); -}; - -// Gives information about the given GPU -class CV_EXPORTS DeviceInfo -{ -public: - // Creates DeviceInfo object for the current GPU - DeviceInfo() : device_id_(getDevice()) { query(); } - - // Creates DeviceInfo object for the given GPU - DeviceInfo(int device_id) : device_id_(device_id) { query(); } - - std::string name() const { return name_; } - - // Return compute capability versions - int majorVersion() const { return majorVersion_; } - int minorVersion() const { return minorVersion_; } - - int multiProcessorCount() const { return multi_processor_count_; } - - size_t freeMemory() const; - size_t totalMemory() const; - - // Checks whether device supports the given feature - bool supports(FeatureSet feature_set) const; - - // Checks whether the GPU module can be run on the given device - bool isCompatible() const; - - int deviceID() const { return device_id_; } - -private: - void query(); - void queryMemory(size_t& free_memory, size_t& total_memory) const; - - int device_id_; - - std::string name_; - int multi_processor_count_; - int majorVersion_; - int minorVersion_; -}; - -CV_EXPORTS void printCudaDeviceInfo(int device); -CV_EXPORTS void printShortCudaDeviceInfo(int device); - //////////////////////////////// CudaMem //////////////////////////////// // CudaMem is limited cv::Mat with page locked memory allocation. // Page locked memory is only needed for async and faster coping to GPU. diff --git a/modules/gpu/src/initialization.cpp b/modules/gpu/src/initialization.cpp deleted file mode 100644 index e30f878df..000000000 --- a/modules/gpu/src/initialization.cpp +++ /dev/null @@ -1,426 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "precomp.hpp" - -using namespace cv; -using namespace cv::gpu; - - -namespace -{ - // Compares value to set using the given comparator. Returns true if - // there is at least one element x in the set satisfying to: x cmp value - // predicate. - template - bool compareToSet(const std::string& set_as_str, int value, Comparer cmp) - { - if (set_as_str.find_first_not_of(" ") == string::npos) - return false; - - std::stringstream stream(set_as_str); - int cur_value; - - while (!stream.eof()) - { - stream >> cur_value; - if (cmp(cur_value, value)) - return true; - } - - return false; - } -} - - -bool cv::gpu::TargetArchs::builtWith(cv::gpu::FeatureSet feature_set) -{ -#if defined (HAVE_CUDA) - return ::compareToSet(CUDA_ARCH_FEATURES, feature_set, std::greater_equal()); -#else - (void)feature_set; - return false; -#endif -} - - -bool cv::gpu::TargetArchs::has(int major, int minor) -{ - return hasPtx(major, minor) || hasBin(major, minor); -} - - -bool cv::gpu::TargetArchs::hasPtx(int major, int minor) -{ -#if defined (HAVE_CUDA) - return ::compareToSet(CUDA_ARCH_PTX, major * 10 + minor, std::equal_to()); -#else - (void)major; - (void)minor; - return false; -#endif -} - - -bool cv::gpu::TargetArchs::hasBin(int major, int minor) -{ -#if defined (HAVE_CUDA) - return ::compareToSet(CUDA_ARCH_BIN, major * 10 + minor, std::equal_to()); -#else - (void)major; - (void)minor; - return false; -#endif -} - - -bool cv::gpu::TargetArchs::hasEqualOrLessPtx(int major, int minor) -{ -#if defined (HAVE_CUDA) - return ::compareToSet(CUDA_ARCH_PTX, major * 10 + minor, - std::less_equal()); -#else - (void)major; - (void)minor; - return false; -#endif -} - - -bool cv::gpu::TargetArchs::hasEqualOrGreater(int major, int minor) -{ - return hasEqualOrGreaterPtx(major, minor) || - hasEqualOrGreaterBin(major, minor); -} - - -bool cv::gpu::TargetArchs::hasEqualOrGreaterPtx(int major, int minor) -{ -#if defined (HAVE_CUDA) - return ::compareToSet(CUDA_ARCH_PTX, major * 10 + minor, - std::greater_equal()); -#else - (void)major; - (void)minor; - return false; -#endif -} - - -bool cv::gpu::TargetArchs::hasEqualOrGreaterBin(int major, int minor) -{ -#if defined (HAVE_CUDA) - return ::compareToSet(CUDA_ARCH_BIN, major * 10 + minor, - std::greater_equal()); -#else - (void)major; - (void)minor; - return false; -#endif -} - - -#if !defined (HAVE_CUDA) - -int cv::gpu::getCudaEnabledDeviceCount() { return 0; } -void cv::gpu::setDevice(int) { throw_nogpu(); } -int cv::gpu::getDevice() { throw_nogpu(); return 0; } -void cv::gpu::resetDevice() { throw_nogpu(); } -size_t cv::gpu::DeviceInfo::freeMemory() const { throw_nogpu(); return 0; } -size_t cv::gpu::DeviceInfo::totalMemory() const { throw_nogpu(); return 0; } -bool cv::gpu::DeviceInfo::supports(cv::gpu::FeatureSet) const { throw_nogpu(); return false; } -bool cv::gpu::DeviceInfo::isCompatible() const { throw_nogpu(); return false; } -void cv::gpu::DeviceInfo::query() { throw_nogpu(); } -void cv::gpu::DeviceInfo::queryMemory(size_t&, size_t&) const { throw_nogpu(); } -void cv::gpu::printCudaDeviceInfo(int) { throw_nogpu(); } -void cv::gpu::printShortCudaDeviceInfo(int) { throw_nogpu(); } - -#else /* !defined (HAVE_CUDA) */ - -int cv::gpu::getCudaEnabledDeviceCount() -{ - int count; - cudaError_t error = cudaGetDeviceCount( &count ); - - if (error == cudaErrorInsufficientDriver) - return -1; - - if (error == cudaErrorNoDevice) - return 0; - - cudaSafeCall(error); - return count; -} - - -void cv::gpu::setDevice(int device) -{ - cudaSafeCall( cudaSetDevice( device ) ); -} - - -int cv::gpu::getDevice() -{ - int device; - cudaSafeCall( cudaGetDevice( &device ) ); - return device; -} - - -void cv::gpu::resetDevice() -{ - cudaSafeCall( cudaDeviceReset() ); -} - - -size_t cv::gpu::DeviceInfo::freeMemory() const -{ - size_t free_memory, total_memory; - queryMemory(free_memory, total_memory); - return free_memory; -} - - -size_t cv::gpu::DeviceInfo::totalMemory() const -{ - size_t free_memory, total_memory; - queryMemory(free_memory, total_memory); - return total_memory; -} - - -bool cv::gpu::DeviceInfo::supports(cv::gpu::FeatureSet feature_set) const -{ - int version = majorVersion() * 10 + minorVersion(); - return version >= feature_set; -} - - -bool cv::gpu::DeviceInfo::isCompatible() const -{ - // Check PTX compatibility - if (TargetArchs::hasEqualOrLessPtx(majorVersion(), minorVersion())) - return true; - - // Check BIN compatibility - for (int i = minorVersion(); i >= 0; --i) - if (TargetArchs::hasBin(majorVersion(), i)) - return true; - - return false; -} - - -void cv::gpu::DeviceInfo::query() -{ - cudaDeviceProp prop; - cudaSafeCall(cudaGetDeviceProperties(&prop, device_id_)); - name_ = prop.name; - multi_processor_count_ = prop.multiProcessorCount; - majorVersion_ = prop.major; - minorVersion_ = prop.minor; -} - - -void cv::gpu::DeviceInfo::queryMemory(size_t& free_memory, size_t& total_memory) const -{ - int prev_device_id = getDevice(); - if (prev_device_id != device_id_) - setDevice(device_id_); - - cudaSafeCall(cudaMemGetInfo(&free_memory, &total_memory)); - - if (prev_device_id != device_id_) - setDevice(prev_device_id); -} - -namespace -{ - template void getCudaAttribute(T *attribute, CUdevice_attribute device_attribute, int device) - { - *attribute = T(); - CUresult error = CUDA_SUCCESS;// = cuDeviceGetAttribute( attribute, device_attribute, device ); why link erros under ubuntu?? - if( CUDA_SUCCESS == error ) - return; - - printf("Driver API error = %04d\n", error); - cv::gpu::error("driver API error", __FILE__, __LINE__); - } - - int convertSMVer2Cores(int major, int minor) - { - // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM - typedef struct { - int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version - int Cores; - } SMtoCores; - - SMtoCores gpuArchCoresPerSM[] = { { 0x10, 8 }, { 0x11, 8 }, { 0x12, 8 }, { 0x13, 8 }, { 0x20, 32 }, { 0x21, 48 }, { -1, -1 } }; - - int index = 0; - while (gpuArchCoresPerSM[index].SM != -1) - { - if (gpuArchCoresPerSM[index].SM == ((major << 4) + minor) ) - return gpuArchCoresPerSM[index].Cores; - index++; - } - printf("MapSMtoCores undefined SMversion %d.%d!\n", major, minor); - return -1; - } -} - -void cv::gpu::printCudaDeviceInfo(int device) -{ - int count = getCudaEnabledDeviceCount(); - bool valid = (device >= 0) && (device < count); - - int beg = valid ? device : 0; - int end = valid ? device+1 : count; - - printf("*** CUDA Device Query (Runtime API) version (CUDART static linking) *** \n\n"); - printf("Device count: %d\n", count); - - int driverVersion = 0, runtimeVersion = 0; - cudaSafeCall( cudaDriverGetVersion(&driverVersion) ); - cudaSafeCall( cudaRuntimeGetVersion(&runtimeVersion) ); - - const char *computeMode[] = { - "Default (multiple host threads can use ::cudaSetDevice() with device simultaneously)", - "Exclusive (only one host thread in one process is able to use ::cudaSetDevice() with this device)", - "Prohibited (no host thread can use ::cudaSetDevice() with this device)", - "Exclusive Process (many threads in one process is able to use ::cudaSetDevice() with this device)", - "Unknown", - NULL - }; - - for(int dev = beg; dev < end; ++dev) - { - cudaDeviceProp prop; - cudaSafeCall( cudaGetDeviceProperties(&prop, dev) ); - - printf("\nDevice %d: \"%s\"\n", dev, prop.name); - printf(" CUDA Driver Version / Runtime Version %d.%d / %d.%d\n", driverVersion/1000, driverVersion%100, runtimeVersion/1000, runtimeVersion%100); - printf(" CUDA Capability Major/Minor version number: %d.%d\n", prop.major, prop.minor); - printf(" Total amount of global memory: %.0f MBytes (%llu bytes)\n", (float)prop.totalGlobalMem/1048576.0f, (unsigned long long) prop.totalGlobalMem); - printf(" (%2d) Multiprocessors x (%2d) CUDA Cores/MP: %d CUDA Cores\n", - prop.multiProcessorCount, convertSMVer2Cores(prop.major, prop.minor), - convertSMVer2Cores(prop.major, prop.minor) * prop.multiProcessorCount); - printf(" GPU Clock Speed: %.2f GHz\n", prop.clockRate * 1e-6f); - - // This is not available in the CUDA Runtime API, so we make the necessary calls the driver API to support this for output - int memoryClock, memBusWidth, L2CacheSize; - getCudaAttribute( &memoryClock, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, dev ); - getCudaAttribute( &memBusWidth, CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, dev ); - getCudaAttribute( &L2CacheSize, CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, dev ); - - printf(" Memory Clock rate: %.2f Mhz\n", memoryClock * 1e-3f); - printf(" Memory Bus Width: %d-bit\n", memBusWidth); - if (L2CacheSize) - printf(" L2 Cache Size: %d bytes\n", L2CacheSize); - - printf(" Max Texture Dimension Size (x,y,z) 1D=(%d), 2D=(%d,%d), 3D=(%d,%d,%d)\n", - prop.maxTexture1D, prop.maxTexture2D[0], prop.maxTexture2D[1], - prop.maxTexture3D[0], prop.maxTexture3D[1], prop.maxTexture3D[2]); - printf(" Max Layered Texture Size (dim) x layers 1D=(%d) x %d, 2D=(%d,%d) x %d\n", - prop.maxTexture1DLayered[0], prop.maxTexture1DLayered[1], - prop.maxTexture2DLayered[0], prop.maxTexture2DLayered[1], prop.maxTexture2DLayered[2]); - - printf(" Total amount of constant memory: %u bytes\n", (int)prop.totalConstMem); - printf(" Total amount of shared memory per block: %u bytes\n", (int)prop.sharedMemPerBlock); - printf(" Total number of registers available per block: %d\n", prop.regsPerBlock); - printf(" Warp size: %d\n", prop.warpSize); - printf(" Maximum number of threads per block: %d\n", prop.maxThreadsPerBlock); - printf(" Maximum sizes of each dimension of a block: %d x %d x %d\n", prop.maxThreadsDim[0], prop.maxThreadsDim[1], prop.maxThreadsDim[2]); - printf(" Maximum sizes of each dimension of a grid: %d x %d x %d\n", prop.maxGridSize[0], prop.maxGridSize[1], prop.maxGridSize[2]); - printf(" Maximum memory pitch: %u bytes\n", (int)prop.memPitch); - printf(" Texture alignment: %u bytes\n", (int)prop.textureAlignment); - - printf(" Concurrent copy and execution: %s with %d copy engine(s)\n", (prop.deviceOverlap ? "Yes" : "No"), prop.asyncEngineCount); - printf(" Run time limit on kernels: %s\n", prop.kernelExecTimeoutEnabled ? "Yes" : "No"); - printf(" Integrated GPU sharing Host Memory: %s\n", prop.integrated ? "Yes" : "No"); - printf(" Support host page-locked memory mapping: %s\n", prop.canMapHostMemory ? "Yes" : "No"); - - printf(" Concurrent kernel execution: %s\n", prop.concurrentKernels ? "Yes" : "No"); - printf(" Alignment requirement for Surfaces: %s\n", prop.surfaceAlignment ? "Yes" : "No"); - printf(" Device has ECC support enabled: %s\n", prop.ECCEnabled ? "Yes" : "No"); - printf(" Device is using TCC driver mode: %s\n", prop.tccDriver ? "Yes" : "No"); - printf(" Device supports Unified Addressing (UVA): %s\n", prop.unifiedAddressing ? "Yes" : "No"); - printf(" Device PCI Bus ID / PCI location ID: %d / %d\n", prop.pciBusID, prop.pciDeviceID ); - printf(" Compute Mode:\n"); - printf(" %s \n", computeMode[prop.computeMode]); - } - - printf("\n"); - printf("deviceQuery, CUDA Driver = CUDART"); - printf(", CUDA Driver Version = %d.%d", driverVersion / 1000, driverVersion % 100); - printf(", CUDA Runtime Version = %d.%d", runtimeVersion/1000, runtimeVersion%100); - printf(", NumDevs = %d\n\n", count); - fflush(stdout); -} - -void cv::gpu::printShortCudaDeviceInfo(int device) -{ - int count = getCudaEnabledDeviceCount(); - bool valid = (device >= 0) && (device < count); - - int beg = valid ? device : 0; - int end = valid ? device+1 : count; - - int driverVersion = 0, runtimeVersion = 0; - cudaSafeCall( cudaDriverGetVersion(&driverVersion) ); - cudaSafeCall( cudaRuntimeGetVersion(&runtimeVersion) ); - - for(int dev = beg; dev < end; ++dev) - { - cudaDeviceProp prop; - cudaSafeCall( cudaGetDeviceProperties(&prop, dev) ); - - const char *arch_str = prop.major < 2 ? " (not Fermi)" : ""; - printf("Device %d: \"%s\" %.0fMb", dev, prop.name, (float)prop.totalGlobalMem/1048576.0f); - printf(", sm_%d%d%s, %d cores", prop.major, prop.minor, arch_str, convertSMVer2Cores(prop.major, prop.minor) * prop.multiProcessorCount); - printf(", Driver/Runtime ver.%d.%d/%d.%d\n", driverVersion/1000, driverVersion%100, runtimeVersion/1000, runtimeVersion%100); - } - fflush(stdout); -} - -#endif - diff --git a/modules/gpu/src/matrix_reductions.cpp b/modules/gpu/src/matrix_reductions.cpp index 223d6c8a2..4fa81e155 100644 --- a/modules/gpu/src/matrix_reductions.cpp +++ b/modules/gpu/src/matrix_reductions.cpp @@ -118,6 +118,9 @@ void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev, GpuMat { CV_Assert(src.type() == CV_8UC1); + if (!TargetArchs::builtWith(FEATURE_SET_COMPUTE_13) || !DeviceInfo().supports(FEATURE_SET_COMPUTE_13)) + CV_Error(CV_StsNotImplemented, "Not sufficient compute capebility"); + NppiSize sz; sz.width = src.cols; sz.height = src.rows; diff --git a/modules/gpu/src/split_merge.cpp b/modules/gpu/src/split_merge.cpp index 0b9f971ad..2d5b1b13c 100644 --- a/modules/gpu/src/split_merge.cpp +++ b/modules/gpu/src/split_merge.cpp @@ -55,10 +55,10 @@ void cv::gpu::split(const GpuMat& /*src*/, vector& /*dst*/, Stream& /*st #else /* !defined (HAVE_CUDA) */ -namespace cv { namespace gpu { namespace device +namespace cv { namespace gpu { namespace device { - namespace split_merge - { + namespace split_merge + { void merge_caller(const DevMem2Db* src, DevMem2Db& dst, int total_channels, size_t elem_size, const cudaStream_t& stream); void split_caller(const DevMem2Db& src, DevMem2Db* dst, int num_channels, size_t elem_size1, const cudaStream_t& stream); } @@ -66,7 +66,7 @@ namespace cv { namespace gpu { namespace device namespace { - void merge(const GpuMat* src, size_t n, GpuMat& dst, const cudaStream_t& stream) + void merge(const GpuMat* src, size_t n, GpuMat& dst, const cudaStream_t& stream) { using namespace ::cv::gpu::device::split_merge; @@ -76,6 +76,12 @@ namespace int depth = src[0].depth(); Size size = src[0].size(); + if (depth == CV_64F) + { + if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE)) + CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); + } + bool single_channel_only = true; int total_channels = 0; @@ -90,9 +96,9 @@ namespace CV_Assert(single_channel_only); CV_Assert(total_channels <= 4); - if (total_channels == 1) + if (total_channels == 1) src[0].copyTo(dst); - else + else { dst.create(size, CV_MAKETYPE(depth, total_channels)); @@ -102,10 +108,10 @@ namespace DevMem2Db dst_as_devmem(dst); merge_caller(src_as_devmem, dst_as_devmem, total_channels, CV_ELEM_SIZE(depth), stream); - } + } } - void split(const GpuMat& src, GpuMat* dst, const cudaStream_t& stream) + void split(const GpuMat& src, GpuMat* dst, const cudaStream_t& stream) { using namespace ::cv::gpu::device::split_merge; @@ -115,6 +121,12 @@ namespace int num_channels = src.channels(); Size size = src.size(); + if (depth == CV_64F) + { + if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE)) + CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); + } + if (num_channels == 1) { src.copyTo(dst[0]); @@ -135,23 +147,23 @@ namespace } } -void cv::gpu::merge(const GpuMat* src, size_t n, GpuMat& dst, Stream& stream) -{ +void cv::gpu::merge(const GpuMat* src, size_t n, GpuMat& dst, Stream& stream) +{ ::merge(src, n, dst, StreamAccessor::getStream(stream)); } -void cv::gpu::merge(const vector& src, GpuMat& dst, Stream& stream) +void cv::gpu::merge(const vector& src, GpuMat& dst, Stream& stream) { ::merge(&src[0], src.size(), dst, StreamAccessor::getStream(stream)); } -void cv::gpu::split(const GpuMat& src, GpuMat* dst, Stream& stream) +void cv::gpu::split(const GpuMat& src, GpuMat* dst, Stream& stream) { ::split(src, dst, StreamAccessor::getStream(stream)); } -void cv::gpu::split(const GpuMat& src, vector& dst, Stream& stream) +void cv::gpu::split(const GpuMat& src, vector& dst, Stream& stream) { dst.resize(src.channels()); if(src.channels() > 0) diff --git a/modules/gpu/test/test_core.cpp b/modules/gpu/test/test_core.cpp index a00c6fa2a..d0a4634dc 100644 --- a/modules/gpu/test/test_core.cpp +++ b/modules/gpu/test/test_core.cpp @@ -43,6 +43,138 @@ namespace { +//////////////////////////////////////////////////////////////////////////////// +// Merge + +PARAM_TEST_CASE(Merge, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels, UseRoi) +{ + cv::gpu::DeviceInfo devInfo; + cv::Size size; + int depth; + int channels; + bool useRoi; + + virtual void SetUp() + { + devInfo = GET_PARAM(0); + size = GET_PARAM(1); + depth = GET_PARAM(2); + channels = GET_PARAM(3); + useRoi = GET_PARAM(4); + + cv::gpu::setDevice(devInfo.deviceID()); + } +}; + +TEST_P(Merge, Accuracy) +{ + std::vector src; + src.reserve(channels); + for (int i = 0; i < channels; ++i) + src.push_back(cv::Mat(size, depth, cv::Scalar::all(i))); + + std::vector d_src; + for (int i = 0; i < channels; ++i) + d_src.push_back(loadMat(src[i], useRoi)); + + if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE)) + { + try + { + cv::gpu::GpuMat dst; + cv::gpu::merge(d_src, dst); + } + catch (const cv::Exception& e) + { + ASSERT_EQ(CV_StsUnsupportedFormat, e.code); + } + } + else + { + cv::gpu::GpuMat dst; + cv::gpu::merge(d_src, dst); + + cv::Mat dst_gold; + cv::merge(src, dst_gold); + + EXPECT_MAT_NEAR(dst_gold, dst, 0.0); + } +} + +INSTANTIATE_TEST_CASE_P(GPU_Core, Merge, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + ALL_DEPTH, + testing::Values(1, 2, 3, 4), + WHOLE_SUBMAT)); + +//////////////////////////////////////////////////////////////////////////////// +// Split + +PARAM_TEST_CASE(Split, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels, UseRoi) +{ + cv::gpu::DeviceInfo devInfo; + cv::Size size; + int depth; + int channels; + bool useRoi; + + int type; + + virtual void SetUp() + { + devInfo = GET_PARAM(0); + size = GET_PARAM(1); + depth = GET_PARAM(2); + channels = GET_PARAM(3); + useRoi = GET_PARAM(4); + + cv::gpu::setDevice(devInfo.deviceID()); + + type = CV_MAKE_TYPE(depth, channels); + } +}; + +TEST_P(Split, Accuracy) +{ + cv::Mat src = randomMat(size, type); + + if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE)) + { + try + { + std::vector dst; + cv::gpu::split(loadMat(src), dst); + } + catch (const cv::Exception& e) + { + ASSERT_EQ(CV_StsUnsupportedFormat, e.code); + } + } + else + { + std::vector dst; + cv::gpu::split(loadMat(src, useRoi), dst); + + std::vector dst_gold; + cv::split(src, dst_gold); + + ASSERT_EQ(dst_gold.size(), dst.size()); + + for (size_t i = 0; i < dst_gold.size(); ++i) + { + EXPECT_MAT_NEAR(dst_gold[i], dst[i], 0.0); + } + } +} + +INSTANTIATE_TEST_CASE_P(GPU_Core, Split, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + ALL_DEPTH, + testing::Values(1, 2, 3, 4), + WHOLE_SUBMAT)); + //////////////////////////////////////////////////////////////////////////////// // Add_Array @@ -1974,7 +2106,7 @@ TEST_P(AddWeighted, Accuracy) cv::Mat dst_gold; cv::addWeighted(src1, alpha, src2, beta, gamma, dst_gold, dst_depth); - EXPECT_MAT_NEAR(dst_gold, dst, dst_depth < CV_32F ? 1.0 : 1e-12); + EXPECT_MAT_NEAR(dst_gold, dst, dst_depth < CV_32F ? 1.0 : 1e-3); } } @@ -2487,16 +2619,32 @@ TEST_P(MeanStdDev, Accuracy) { cv::Mat src = randomMat(size, CV_8UC1); - cv::Scalar mean; - cv::Scalar stddev; - cv::gpu::meanStdDev(loadMat(src, useRoi), mean, stddev); + if (!supportFeature(devInfo, cv::gpu::FEATURE_SET_COMPUTE_13)) + { + try + { + cv::Scalar mean; + cv::Scalar stddev; + cv::gpu::meanStdDev(loadMat(src, useRoi), mean, stddev); + } + catch (const cv::Exception& e) + { + ASSERT_EQ(CV_StsNotImplemented, e.code); + } + } + else + { + cv::Scalar mean; + cv::Scalar stddev; + cv::gpu::meanStdDev(loadMat(src, useRoi), mean, stddev); - cv::Scalar mean_gold; - cv::Scalar stddev_gold; - cv::meanStdDev(src, mean_gold, stddev_gold); + cv::Scalar mean_gold; + cv::Scalar stddev_gold; + cv::meanStdDev(src, mean_gold, stddev_gold); - EXPECT_SCALAR_NEAR(mean_gold, mean, 1e-5); - EXPECT_SCALAR_NEAR(stddev_gold, stddev, 1e-5); + EXPECT_SCALAR_NEAR(mean_gold, mean, 1e-5); + EXPECT_SCALAR_NEAR(stddev_gold, stddev, 1e-5); + } } INSTANTIATE_TEST_CASE_P(GPU_Core, MeanStdDev, testing::Combine( diff --git a/modules/gpu/test/test_gpumat.cpp b/modules/gpu/test/test_gpumat.cpp new file mode 100644 index 000000000..8457b71a6 --- /dev/null +++ b/modules/gpu/test/test_gpumat.cpp @@ -0,0 +1,325 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other GpuMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or bpied warranties, including, but not limited to, the bpied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +namespace { + +//////////////////////////////////////////////////////////////////////////////// +// SetTo + +PARAM_TEST_CASE(SetTo, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi) +{ + cv::gpu::DeviceInfo devInfo; + cv::Size size; + int type; + bool useRoi; + + virtual void SetUp() + { + devInfo = GET_PARAM(0); + size = GET_PARAM(1); + type = GET_PARAM(2); + useRoi = GET_PARAM(3); + + cv::gpu::setDevice(devInfo.deviceID()); + } +}; + +TEST_P(SetTo, Zero) +{ + cv::Scalar zero = cv::Scalar::all(0); + + cv::gpu::GpuMat mat = createMat(size, type, useRoi); + mat.setTo(zero); + + EXPECT_MAT_NEAR(cv::Mat::zeros(size, type), mat, 0.0); +} + +TEST_P(SetTo, SameVal) +{ + cv::Scalar val = cv::Scalar::all(randomDouble(0.0, 255.0)); + + if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE)) + { + try + { + cv::gpu::GpuMat mat = createMat(size, type, useRoi); + mat.setTo(val); + } + catch (const cv::Exception& e) + { + ASSERT_EQ(CV_StsUnsupportedFormat, e.code); + } + } + else + { + cv::gpu::GpuMat mat = createMat(size, type, useRoi); + mat.setTo(val); + + EXPECT_MAT_NEAR(cv::Mat(size, type, val), mat, 0.0); + } +} + +TEST_P(SetTo, DifferentVal) +{ + cv::Scalar val = randomScalar(0.0, 255.0); + + if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE)) + { + try + { + cv::gpu::GpuMat mat = createMat(size, type, useRoi); + mat.setTo(val); + } + catch (const cv::Exception& e) + { + ASSERT_EQ(CV_StsUnsupportedFormat, e.code); + } + } + else + { + cv::gpu::GpuMat mat = createMat(size, type, useRoi); + mat.setTo(val); + + EXPECT_MAT_NEAR(cv::Mat(size, type, val), mat, 0.0); + } +} + +TEST_P(SetTo, Masked) +{ + cv::Scalar val = randomScalar(0.0, 255.0); + cv::Mat mat_gold = randomMat(size, type); + cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0); + + if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE)) + { + try + { + cv::gpu::GpuMat mat = createMat(size, type, useRoi); + mat.setTo(val, loadMat(mask)); + } + catch (const cv::Exception& e) + { + ASSERT_EQ(CV_StsUnsupportedFormat, e.code); + } + } + else + { + cv::gpu::GpuMat mat = loadMat(mat_gold, useRoi); + mat.setTo(val, loadMat(mask, useRoi)); + + mat_gold.setTo(val, mask); + + EXPECT_MAT_NEAR(mat_gold, mat, 0.0); + } +} + +INSTANTIATE_TEST_CASE_P(GPU_GpuMat, SetTo, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + ALL_TYPES, + WHOLE_SUBMAT)); + +//////////////////////////////////////////////////////////////////////////////// +// CopyTo + +PARAM_TEST_CASE(CopyTo, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi) +{ + cv::gpu::DeviceInfo devInfo; + cv::Size size; + int type; + bool useRoi; + + + virtual void SetUp() + { + devInfo = GET_PARAM(0); + size = GET_PARAM(1); + type = GET_PARAM(2); + useRoi = GET_PARAM(3); + + cv::gpu::setDevice(devInfo.deviceID()); + } +}; + +TEST_P(CopyTo, WithOutMask) +{ + cv::Mat src = randomMat(size, type); + + cv::gpu::GpuMat d_src = loadMat(src, useRoi); + cv::gpu::GpuMat dst = createMat(size, type, useRoi); + d_src.copyTo(dst); + + EXPECT_MAT_NEAR(src, dst, 0.0); +} + +TEST_P(CopyTo, Masked) +{ + cv::Mat src = randomMat(size, type); + cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0); + + if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE)) + { + try + { + cv::gpu::GpuMat d_src = loadMat(src); + cv::gpu::GpuMat dst; + d_src.copyTo(dst, loadMat(mask, useRoi)); + } + catch (const cv::Exception& e) + { + ASSERT_EQ(CV_StsUnsupportedFormat, e.code); + } + } + else + { + cv::gpu::GpuMat d_src = loadMat(src, useRoi); + cv::gpu::GpuMat dst = loadMat(cv::Mat::zeros(size, type), useRoi); + d_src.copyTo(dst, loadMat(mask, useRoi)); + + cv::Mat dst_gold = cv::Mat::zeros(size, type); + src.copyTo(dst_gold, mask); + + EXPECT_MAT_NEAR(dst_gold, dst, 0.0); + } +} + +INSTANTIATE_TEST_CASE_P(GPU_GpuMat, CopyTo, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + ALL_TYPES, + WHOLE_SUBMAT)); + +//////////////////////////////////////////////////////////////////////////////// +// ConvertTo + +PARAM_TEST_CASE(ConvertTo, cv::gpu::DeviceInfo, cv::Size, MatDepth, MatDepth, UseRoi) +{ + cv::gpu::DeviceInfo devInfo; + cv::Size size; + int depth1; + int depth2; + bool useRoi; + + virtual void SetUp() + { + devInfo = GET_PARAM(0); + size = GET_PARAM(1); + depth1 = GET_PARAM(2); + depth2 = GET_PARAM(3); + useRoi = GET_PARAM(4); + + cv::gpu::setDevice(devInfo.deviceID()); + } +}; + +TEST_P(ConvertTo, WithOutScaling) +{ + cv::Mat src = randomMat(size, depth1); + + if ((depth1 == CV_64F || depth2 == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE)) + { + try + { + cv::gpu::GpuMat d_src = loadMat(src); + cv::gpu::GpuMat dst; + d_src.convertTo(dst, depth2); + } + catch (const cv::Exception& e) + { + ASSERT_EQ(CV_StsUnsupportedFormat, e.code); + } + } + else + { + cv::gpu::GpuMat d_src = loadMat(src, useRoi); + cv::gpu::GpuMat dst = createMat(size, depth2, useRoi); + d_src.convertTo(dst, depth2); + + cv::Mat dst_gold; + src.convertTo(dst_gold, depth2); + + EXPECT_MAT_NEAR(dst_gold, dst, 0.0); + } +} + +TEST_P(ConvertTo, WithScaling) +{ + cv::Mat src = randomMat(size, depth1); + double a = randomDouble(0.0, 1.0); + double b = randomDouble(-10.0, 10.0); + + if ((depth1 == CV_64F || depth2 == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE)) + { + try + { + cv::gpu::GpuMat d_src = loadMat(src); + cv::gpu::GpuMat dst; + d_src.convertTo(dst, depth2, a, b); + } + catch (const cv::Exception& e) + { + ASSERT_EQ(CV_StsUnsupportedFormat, e.code); + } + } + else + { + cv::gpu::GpuMat d_src = loadMat(src, useRoi); + cv::gpu::GpuMat dst = createMat(size, depth2, useRoi); + d_src.convertTo(dst, depth2, a, b); + + cv::Mat dst_gold; + src.convertTo(dst_gold, depth2, a, b); + + EXPECT_MAT_NEAR(dst_gold, dst, depth2 < CV_32F ? 0.0 : 1e-4); + } +} + +INSTANTIATE_TEST_CASE_P(GPU_GpuMat, ConvertTo, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + ALL_DEPTH, + ALL_DEPTH, + WHOLE_SUBMAT)); + +} // namespace diff --git a/modules/gpu/test/test_hog.cpp b/modules/gpu/test/test_hog.cpp deleted file mode 100644 index df04417e5..000000000 --- a/modules/gpu/test/test_hog.cpp +++ /dev/null @@ -1,323 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// Intel License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000, Intel Corporation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of Intel Corporation may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "precomp.hpp" - -#ifdef HAVE_CUDA - -using namespace cvtest; -using namespace testing; - -//#define DUMP - -struct CV_GpuHogDetectTestRunner : cv::gpu::HOGDescriptor -{ - void run() - { - cv::Mat img_rgb = readImage("hog/road.png"); - ASSERT_FALSE(img_rgb.empty()); - -#ifdef DUMP - f.open((std::string(cvtest::TS::ptr()->get_data_path()) + "hog/expected_output.bin").c_str(), std::ios_base::binary); - ASSERT_TRUE(f.is_open()); -#else - f.open((std::string(cvtest::TS::ptr()->get_data_path()) + "hog/expected_output.bin").c_str(), std::ios_base::binary); - ASSERT_TRUE(f.is_open()); -#endif - - // Test on color image - cv::Mat img; - cv::cvtColor(img_rgb, img, CV_BGR2BGRA); - test(img); - - // Test on gray image - cv::cvtColor(img_rgb, img, CV_BGR2GRAY); - test(img); - - f.close(); - } - -#ifdef DUMP - void dump(const cv::Mat& block_hists, const std::vector& locations) - { - f.write((char*)&block_hists.rows, sizeof(block_hists.rows)); - f.write((char*)&block_hists.cols, sizeof(block_hists.cols)); - for (int i = 0; i < block_hists.rows; ++i) - { - for (int j = 0; j < block_hists.cols; ++j) - { - float val = block_hists.at(i, j); - f.write((char*)&val, sizeof(val)); - } - } - int nlocations = locations.size(); - f.write((char*)&nlocations, sizeof(nlocations)); - for (int i = 0; i < locations.size(); ++i) - f.write((char*)&locations[i], sizeof(locations[i])); - } -#else - void compare(const cv::Mat& block_hists, const std::vector& locations) - { - int rows, cols; - int nlocations; - - f.read((char*)&rows, sizeof(rows)); - f.read((char*)&cols, sizeof(cols)); - ASSERT_EQ(rows, block_hists.rows); - ASSERT_EQ(cols, block_hists.cols); - for (int i = 0; i < block_hists.rows; ++i) - { - for (int j = 0; j < block_hists.cols; ++j) - { - float val; - f.read((char*)&val, sizeof(val)); - ASSERT_NEAR(val, block_hists.at(i, j), 1e-3); - } - } - f.read((char*)&nlocations, sizeof(nlocations)); - ASSERT_EQ(nlocations, static_cast(locations.size())); - for (int i = 0; i < nlocations; ++i) - { - cv::Point location; - f.read((char*)&location, sizeof(location)); - ASSERT_EQ(location, locations[i]); - } - } -#endif - - void test(const cv::Mat& img) - { - cv::gpu::GpuMat d_img(img); - - gamma_correction = false; - setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector()); - //cpu detector may be updated soon - //hog.setSVMDetector(cv::HOGDescriptor::getDefaultPeopleDetector()); - - std::vector locations; - - // Test detect - detect(d_img, locations, 0); - -#ifdef DUMP - dump(block_hists, locations); -#else - compare(cv::Mat(block_hists), locations); -#endif - - // Test detect on smaller image - cv::Mat img2; - cv::resize(img, img2, cv::Size(img.cols / 2, img.rows / 2)); - detect(cv::gpu::GpuMat(img2), locations, 0); - -#ifdef DUMP - dump(block_hists, locations); -#else - compare(cv::Mat(block_hists), locations); -#endif - - // Test detect on greater image - cv::resize(img, img2, cv::Size(img.cols * 2, img.rows * 2)); - detect(cv::gpu::GpuMat(img2), locations, 0); - -#ifdef DUMP - dump(block_hists, locations); -#else - compare(cv::Mat(block_hists), locations); -#endif - } - -#ifdef DUMP - std::ofstream f; -#else - std::ifstream f; -#endif -}; - -struct Detect : TestWithParam -{ - cv::gpu::DeviceInfo devInfo; - - virtual void SetUp() - { - devInfo = GetParam(); - - cv::gpu::setDevice(devInfo.deviceID()); - } -}; - -TEST_P(Detect, Accuracy) -{ - CV_GpuHogDetectTestRunner runner; - runner.run(); -} - -INSTANTIATE_TEST_CASE_P(HOG, Detect, ALL_DEVICES); - -struct CV_GpuHogGetDescriptorsTestRunner : cv::gpu::HOGDescriptor -{ - CV_GpuHogGetDescriptorsTestRunner(): cv::gpu::HOGDescriptor(cv::Size(64, 128)) {} - - void run() - { - // Load image (e.g. train data, composed from windows) - cv::Mat img_rgb = readImage("hog/train_data.png"); - ASSERT_FALSE(img_rgb.empty()); - - // Convert to C4 - cv::Mat img; - cv::cvtColor(img_rgb, img, CV_BGR2BGRA); - - cv::gpu::GpuMat d_img(img); - - // Convert train images into feature vectors (train table) - cv::gpu::GpuMat descriptors, descriptors_by_cols; - getDescriptors(d_img, win_size, descriptors, DESCR_FORMAT_ROW_BY_ROW); - getDescriptors(d_img, win_size, descriptors_by_cols, DESCR_FORMAT_COL_BY_COL); - - // Check size of the result train table - wins_per_img_x = 3; - wins_per_img_y = 2; - blocks_per_win_x = 7; - blocks_per_win_y = 15; - block_hist_size = 36; - cv::Size descr_size_expected = cv::Size(blocks_per_win_x * blocks_per_win_y * block_hist_size, - wins_per_img_x * wins_per_img_y); - ASSERT_EQ(descr_size_expected, descriptors.size()); - - // Check both formats of output descriptors are handled correctly - cv::Mat dr(descriptors); - cv::Mat dc(descriptors_by_cols); - for (int i = 0; i < wins_per_img_x * wins_per_img_y; ++i) - { - const float* l = dr.rowRange(i, i + 1).ptr(); - const float* r = dc.rowRange(i, i + 1).ptr(); - for (int y = 0; y < blocks_per_win_y; ++y) - for (int x = 0; x < blocks_per_win_x; ++x) - for (int k = 0; k < block_hist_size; ++k) - ASSERT_EQ(l[(y * blocks_per_win_x + x) * block_hist_size + k], - r[(x * blocks_per_win_y + y) * block_hist_size + k]); - } - - /* Now we want to extract the same feature vectors, but from single images. NOTE: results will - be defferent, due to border values interpolation. Using of many small images is slower, however we - wont't call getDescriptors and will use computeBlockHistograms instead of. computeBlockHistograms - works good, it can be checked in the gpu_hog sample */ - - img_rgb = readImage("hog/positive1.png"); - ASSERT_TRUE(!img_rgb.empty()); - cv::cvtColor(img_rgb, img, CV_BGR2BGRA); - computeBlockHistograms(cv::gpu::GpuMat(img)); - // Everything is fine with interpolation for left top subimage - ASSERT_EQ(0.0, cv::norm((cv::Mat)block_hists, (cv::Mat)descriptors.rowRange(0, 1))); - - img_rgb = readImage("hog/positive2.png"); - ASSERT_TRUE(!img_rgb.empty()); - cv::cvtColor(img_rgb, img, CV_BGR2BGRA); - computeBlockHistograms(cv::gpu::GpuMat(img)); - compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(1, 2))); - - img_rgb = readImage("hog/negative1.png"); - ASSERT_TRUE(!img_rgb.empty()); - cv::cvtColor(img_rgb, img, CV_BGR2BGRA); - computeBlockHistograms(cv::gpu::GpuMat(img)); - compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(2, 3))); - - img_rgb = readImage("hog/negative2.png"); - ASSERT_TRUE(!img_rgb.empty()); - cv::cvtColor(img_rgb, img, CV_BGR2BGRA); - computeBlockHistograms(cv::gpu::GpuMat(img)); - compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(3, 4))); - - img_rgb = readImage("hog/positive3.png"); - ASSERT_TRUE(!img_rgb.empty()); - cv::cvtColor(img_rgb, img, CV_BGR2BGRA); - computeBlockHistograms(cv::gpu::GpuMat(img)); - compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(4, 5))); - - img_rgb = readImage("hog/negative3.png"); - ASSERT_TRUE(!img_rgb.empty()); - cv::cvtColor(img_rgb, img, CV_BGR2BGRA); - computeBlockHistograms(cv::gpu::GpuMat(img)); - compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(5, 6))); - } - - // Does not compare border value, as interpolation leads to delta - void compare_inner_parts(cv::Mat d1, cv::Mat d2) - { - for (int i = 1; i < blocks_per_win_y - 1; ++i) - for (int j = 1; j < blocks_per_win_x - 1; ++j) - for (int k = 0; k < block_hist_size; ++k) - { - float a = d1.at(0, (i * blocks_per_win_x + j) * block_hist_size); - float b = d2.at(0, (i * blocks_per_win_x + j) * block_hist_size); - ASSERT_FLOAT_EQ(a, b); - } - } - - int wins_per_img_x; - int wins_per_img_y; - int blocks_per_win_x; - int blocks_per_win_y; - int block_hist_size; -}; - -struct GetDescriptors : TestWithParam -{ - cv::gpu::DeviceInfo devInfo; - - virtual void SetUp() - { - devInfo = GetParam(); - - cv::gpu::setDevice(devInfo.deviceID()); - } -}; - -TEST_P(GetDescriptors, Accuracy) -{ - CV_GpuHogGetDescriptorsTestRunner runner; - runner.run(); -} - -INSTANTIATE_TEST_CASE_P(HOG, GetDescriptors, ALL_DEVICES); - -#endif // HAVE_CUDA diff --git a/modules/gpu/test/test_matop.cpp b/modules/gpu/test/test_matop.cpp deleted file mode 100644 index f5bdb8f57..000000000 --- a/modules/gpu/test/test_matop.cpp +++ /dev/null @@ -1,559 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other GpuMaterials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or bpied warranties, including, but not limited to, the bpied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "precomp.hpp" - -#ifdef HAVE_CUDA - -using namespace cvtest; -using namespace testing; - -//////////////////////////////////////////////////////////////////////////////// -// merge - -PARAM_TEST_CASE(Merge, cv::gpu::DeviceInfo, MatType, UseRoi) -{ - cv::gpu::DeviceInfo devInfo; - int type; - bool useRoi; - - cv::Size size; - std::vector src; - - cv::Mat dst_gold; - - virtual void SetUp() - { - devInfo = GET_PARAM(0); - type = GET_PARAM(1); - useRoi = GET_PARAM(2); - - cv::gpu::setDevice(devInfo.deviceID()); - - cv::RNG& rng = TS::ptr()->get_rng(); - - size = cv::Size(rng.uniform(20, 150), rng.uniform(20, 150)); - - int depth = CV_MAT_DEPTH(type); - int num_channels = CV_MAT_CN(type); - src.reserve(num_channels); - for (int i = 0; i < num_channels; ++i) - src.push_back(cv::Mat(size, depth, cv::Scalar::all(i))); - - cv::merge(src, dst_gold); - } -}; - -TEST_P(Merge, Accuracy) -{ - if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE)) - return; - - cv::Mat dst; - - std::vector dev_src; - cv::gpu::GpuMat dev_dst; - - for (size_t i = 0; i < src.size(); ++i) - dev_src.push_back(loadMat(src[i], useRoi)); - - cv::gpu::merge(dev_src, dev_dst); - - dev_dst.download(dst); - - EXPECT_MAT_NEAR(dst_gold, dst, 0.0); -} - -INSTANTIATE_TEST_CASE_P(MatOp, Merge, Combine( - ALL_DEVICES, - ALL_TYPES, - WHOLE_SUBMAT)); - -//////////////////////////////////////////////////////////////////////////////// -// split - -PARAM_TEST_CASE(Split, cv::gpu::DeviceInfo, MatType, UseRoi) -{ - cv::gpu::DeviceInfo devInfo; - int type; - bool useRoi; - - cv::Size size; - cv::Mat src; - - std::vector dst_gold; - - virtual void SetUp() - { - devInfo = GET_PARAM(0); - type = GET_PARAM(1); - useRoi = GET_PARAM(2); - - cv::gpu::setDevice(devInfo.deviceID()); - - cv::RNG& rng = TS::ptr()->get_rng(); - - size = cv::Size(rng.uniform(20, 150), rng.uniform(20, 150)); - - src.create(size, type); - src.setTo(cv::Scalar(1.0, 2.0, 3.0, 4.0)); - cv::split(src, dst_gold); - } -}; - -TEST_P(Split, Accuracy) -{ - if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE)) - return; - - std::vector dst; - - std::vector dev_dst; - - cv::gpu::split(loadMat(src, useRoi), dev_dst); - - dst.resize(dev_dst.size()); - - for (size_t i = 0; i < dev_dst.size(); ++i) - dev_dst[i].download(dst[i]); - - ASSERT_EQ(dst_gold.size(), dst.size()); - - for (size_t i = 0; i < dst_gold.size(); ++i) - { - EXPECT_MAT_NEAR(dst_gold[i], dst[i], 0.0); - } -} - -INSTANTIATE_TEST_CASE_P(MatOp, Split, Combine( - ALL_DEVICES, - ALL_TYPES, - WHOLE_SUBMAT)); - -//////////////////////////////////////////////////////////////////////////////// -// split_merge_consistency - -PARAM_TEST_CASE(SplitMerge, cv::gpu::DeviceInfo, MatType) -{ - cv::gpu::DeviceInfo devInfo; - int type; - - cv::Size size; - cv::Mat orig; - - virtual void SetUp() - { - devInfo = GET_PARAM(0); - type = GET_PARAM(1); - - cv::gpu::setDevice(devInfo.deviceID()); - - cv::RNG& rng = TS::ptr()->get_rng(); - - size = cv::Size(rng.uniform(20, 150), rng.uniform(20, 150)); - - orig.create(size, type); - orig.setTo(cv::Scalar(1.0, 2.0, 3.0, 4.0)); - } -}; - -TEST_P(SplitMerge, Consistency) -{ - if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE)) - return; - - cv::Mat final; - - std::vector dev_vec; - cv::gpu::GpuMat dev_final; - - cv::gpu::split(loadMat(orig), dev_vec); - cv::gpu::merge(dev_vec, dev_final); - - dev_final.download(final); - - EXPECT_MAT_NEAR(orig, final, 0.0); -} - -INSTANTIATE_TEST_CASE_P(MatOp, SplitMerge, Combine( - ALL_DEVICES, - ALL_TYPES)); - -//////////////////////////////////////////////////////////////////////////////// -// setTo - -PARAM_TEST_CASE(SetTo, cv::gpu::DeviceInfo, MatType, UseRoi) -{ - cv::gpu::DeviceInfo devInfo; - int type; - bool useRoi; - - cv::Size size; - cv::Mat mat_gold; - - virtual void SetUp() - { - devInfo = GET_PARAM(0); - type = GET_PARAM(1); - useRoi = GET_PARAM(2); - - cv::gpu::setDevice(devInfo.deviceID()); - - cv::RNG& rng = TS::ptr()->get_rng(); - - size = cv::Size(rng.uniform(20, 150), rng.uniform(20, 150)); - - mat_gold.create(size, type); - } -}; - -TEST_P(SetTo, Zero) -{ - if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE)) - return; - - cv::Scalar zero = cv::Scalar::all(0); - - cv::Mat mat; - - cv::gpu::GpuMat dev_mat = loadMat(mat_gold, useRoi); - - mat_gold.setTo(zero); - dev_mat.setTo(zero); - - dev_mat.download(mat); - - EXPECT_MAT_NEAR(mat_gold, mat, 0.0); -} - -TEST_P(SetTo, SameVal) -{ - if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE)) - return; - - cv::Scalar s = cv::Scalar::all(1); - - cv::Mat mat; - - cv::gpu::GpuMat dev_mat(mat_gold); - - mat_gold.setTo(s); - dev_mat.setTo(s); - - dev_mat.download(mat); - - EXPECT_MAT_NEAR(mat_gold, mat, 0.0); -} - -TEST_P(SetTo, DifferentVal) -{ - if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE)) - return; - - cv::Scalar s = cv::Scalar(1, 2, 3, 4); - - cv::Mat mat; - - cv::gpu::GpuMat dev_mat = loadMat(mat_gold, useRoi); - - mat_gold.setTo(s); - dev_mat.setTo(s); - - dev_mat.download(mat); - - EXPECT_MAT_NEAR(mat_gold, mat, 0.0); -} - -TEST_P(SetTo, Masked) -{ - if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE)) - return; - - cv::Scalar s = cv::Scalar(1, 2, 3, 4); - - cv::RNG& rng = TS::ptr()->get_rng(); - cv::Mat mask = randomMat(rng, mat_gold.size(), CV_8UC1, 0.0, 1.5, false); - - cv::Mat mat; - - cv::gpu::GpuMat dev_mat = loadMat(mat_gold, useRoi); - - mat_gold.setTo(s, mask); - dev_mat.setTo(s, loadMat(mask, useRoi)); - - dev_mat.download(mat); - - EXPECT_MAT_NEAR(mat_gold, mat, 0.0); -} - -INSTANTIATE_TEST_CASE_P(MatOp, SetTo, Combine( - ALL_DEVICES, - ALL_TYPES, - WHOLE_SUBMAT)); - -//////////////////////////////////////////////////////////////////////////////// -// copyTo - -PARAM_TEST_CASE(CopyTo, cv::gpu::DeviceInfo, MatType, UseRoi) -{ - cv::gpu::DeviceInfo devInfo; - int type; - bool useRoi; - - cv::Size size; - cv::Mat src; - - virtual void SetUp() - { - devInfo = GET_PARAM(0); - type = GET_PARAM(1); - useRoi = GET_PARAM(2); - - cv::gpu::setDevice(devInfo.deviceID()); - - cv::RNG& rng = TS::ptr()->get_rng(); - - size = cv::Size(rng.uniform(20, 150), rng.uniform(20, 150)); - - src = randomMat(rng, size, type, 0.0, 127.0, false); - } -}; - -TEST_P(CopyTo, WithoutMask) -{ - if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE)) - return; - - cv::Mat dst_gold; - src.copyTo(dst_gold); - - cv::Mat dst; - - cv::gpu::GpuMat dev_src = loadMat(src, useRoi); - cv::gpu::GpuMat dev_dst = loadMat(src, useRoi); - - dev_src.copyTo(dev_dst); - - dev_dst.download(dst); - - EXPECT_MAT_NEAR(dst_gold, dst, 0.0); -} - -TEST_P(CopyTo, Masked) -{ - if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE)) - return; - - cv::RNG& rng = TS::ptr()->get_rng(); - - cv::Mat mask = randomMat(rng, src.size(), CV_8UC1, 0.0, 2.0, false); - - cv::Mat zeroMat(src.size(), src.type(), cv::Scalar::all(0)); - - cv::Mat dst_gold = zeroMat.clone(); - src.copyTo(dst_gold, mask); - - cv::Mat dst; - - cv::gpu::GpuMat dev_src = loadMat(src, useRoi); - cv::gpu::GpuMat dev_dst = loadMat(zeroMat, useRoi); - - dev_src.copyTo(dev_dst, loadMat(mask, useRoi)); - - dev_dst.download(dst); - - EXPECT_MAT_NEAR(dst_gold, dst, 0.0); -} - -INSTANTIATE_TEST_CASE_P(MatOp, CopyTo, Combine( - ALL_DEVICES, - ALL_TYPES, - WHOLE_SUBMAT)); - -//////////////////////////////////////////////////////////////////////////////// -// convertTo - -PARAM_TEST_CASE(ConvertTo, cv::gpu::DeviceInfo, MatType, MatType, UseRoi) -{ - cv::gpu::DeviceInfo devInfo; - int depth1; - int depth2; - bool useRoi; - - cv::Size size; - cv::Mat src; - - virtual void SetUp() - { - devInfo = GET_PARAM(0); - depth1 = GET_PARAM(1); - depth2 = GET_PARAM(2); - useRoi = GET_PARAM(3); - - cv::gpu::setDevice(devInfo.deviceID()); - - cv::RNG& rng = TS::ptr()->get_rng(); - - size = cv::Size(rng.uniform(20, 150), rng.uniform(20, 150)); - - src = randomMat(rng, size, depth1, 0.0, 127.0, false); - } -}; - -TEST_P(ConvertTo, WithoutScaling) -{ - if ((depth1 == CV_64F || depth2 == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE)) - return; - - cv::Mat dst_gold; - src.convertTo(dst_gold, depth2); - - cv::Mat dst; - - cv::gpu::GpuMat dev_src = loadMat(src, useRoi); - cv::gpu::GpuMat dev_dst; - - dev_src.convertTo(dev_dst, depth2); - - dev_dst.download(dst); - - EXPECT_MAT_NEAR(dst_gold, dst, 0.0); -} - -TEST_P(ConvertTo, WithScaling) -{ - if ((depth1 == CV_64F || depth2 == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE)) - return; - - cv::RNG& rng = TS::ptr()->get_rng(); - - const double a = rng.uniform(0.0, 1.0); - const double b = rng.uniform(-10.0, 10.0); - - cv::Mat dst_gold; - src.convertTo(dst_gold, depth2, a, b); - - cv::Mat dst; - - cv::gpu::GpuMat dev_src = loadMat(src, useRoi); - - cv::gpu::GpuMat dev_dst; - - dev_src.convertTo(dev_dst, depth2, a, b); - - dev_dst.download(dst); - - const double eps = depth2 < CV_32F ? 1 : 1e-4; - - EXPECT_MAT_NEAR(dst_gold, dst, eps); -} - -INSTANTIATE_TEST_CASE_P(MatOp, ConvertTo, Combine( - ALL_DEVICES, - TYPES(CV_8U, CV_64F, 1, 1), - TYPES(CV_8U, CV_64F, 1, 1), - WHOLE_SUBMAT)); - -//////////////////////////////////////////////////////////////////////////////// -// async - -struct Async : TestWithParam -{ - cv::gpu::DeviceInfo devInfo; - - cv::gpu::CudaMem src; - - cv::Mat dst_gold0; - cv::Mat dst_gold1; - - virtual void SetUp() - { - devInfo = GetParam(); - - cv::gpu::setDevice(devInfo.deviceID()); - - cv::RNG& rng = TS::ptr()->get_rng(); - - int rows = rng.uniform(100, 200); - int cols = rng.uniform(100, 200); - - src = cv::gpu::CudaMem(cv::Mat::zeros(rows, cols, CV_8UC1)); - - dst_gold0 = cv::Mat(rows, cols, CV_8UC1, cv::Scalar::all(255)); - dst_gold1 = cv::Mat(rows, cols, CV_8UC1, cv::Scalar::all(128)); - } -}; - -TEST_P(Async, Accuracy) -{ - cv::Mat dst0, dst1; - - cv::gpu::CudaMem cpudst0; - cv::gpu::CudaMem cpudst1; - - cv::gpu::GpuMat gpusrc; - cv::gpu::GpuMat gpudst0; - cv::gpu::GpuMat gpudst1(src.rows, src.cols, CV_8UC1); - - cv::gpu::Stream stream0; - cv::gpu::Stream stream1; - - stream0.enqueueUpload(src, gpusrc); - cv::gpu::bitwise_not(gpusrc, gpudst0, cv::gpu::GpuMat(), stream0); - stream0.enqueueDownload(gpudst0, cpudst0); - - stream1.enqueueMemSet(gpudst1, cv::Scalar::all(128)); - stream1.enqueueDownload(gpudst1, cpudst1); - - stream0.waitForCompletion(); - stream1.waitForCompletion(); - - dst0 = cpudst0.createMatHeader(); - dst1 = cpudst1.createMatHeader(); - - EXPECT_MAT_NEAR(dst_gold0, dst0, 0.0); - EXPECT_MAT_NEAR(dst_gold1, dst1, 0.0); -} - -INSTANTIATE_TEST_CASE_P(MatOp, Async, ALL_DEVICES); - -#endif // HAVE_CUDA diff --git a/modules/gpu/test/test_objdetect.cpp b/modules/gpu/test/test_objdetect.cpp new file mode 100644 index 000000000..e8284a2a1 --- /dev/null +++ b/modules/gpu/test/test_objdetect.cpp @@ -0,0 +1,287 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// Intel License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000, Intel Corporation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of Intel Corporation may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +namespace { + +//#define DUMP + +struct HOG : testing::TestWithParam, cv::gpu::HOGDescriptor +{ + cv::gpu::DeviceInfo devInfo; + +#ifdef DUMP + std::ofstream f; +#else + std::ifstream f; +#endif + + int wins_per_img_x; + int wins_per_img_y; + int blocks_per_win_x; + int blocks_per_win_y; + int block_hist_size; + + virtual void SetUp() + { + devInfo = GetParam(); + + cv::gpu::setDevice(devInfo.deviceID()); + } + +#ifdef DUMP + void dump(const cv::Mat& block_hists, const std::vector& locations) + { + f.write((char*)&block_hists.rows, sizeof(block_hists.rows)); + f.write((char*)&block_hists.cols, sizeof(block_hists.cols)); + + for (int i = 0; i < block_hists.rows; ++i) + { + for (int j = 0; j < block_hists.cols; ++j) + { + float val = block_hists.at(i, j); + f.write((char*)&val, sizeof(val)); + } + } + + int nlocations = locations.size(); + f.write((char*)&nlocations, sizeof(nlocations)); + + for (int i = 0; i < locations.size(); ++i) + f.write((char*)&locations[i], sizeof(locations[i])); + } +#else + void compare(const cv::Mat& block_hists, const std::vector& locations) + { + int rows, cols; + f.read((char*)&rows, sizeof(rows)); + f.read((char*)&cols, sizeof(cols)); + ASSERT_EQ(rows, block_hists.rows); + ASSERT_EQ(cols, block_hists.cols); + + for (int i = 0; i < block_hists.rows; ++i) + { + for (int j = 0; j < block_hists.cols; ++j) + { + float val; + f.read((char*)&val, sizeof(val)); + ASSERT_NEAR(val, block_hists.at(i, j), 1e-3); + } + } + + int nlocations; + f.read((char*)&nlocations, sizeof(nlocations)); + ASSERT_EQ(nlocations, static_cast(locations.size())); + + for (int i = 0; i < nlocations; ++i) + { + cv::Point location; + f.read((char*)&location, sizeof(location)); + ASSERT_EQ(location, locations[i]); + } + } +#endif + + void testDetect(const cv::Mat& img) + { + gamma_correction = false; + setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector()); + + std::vector locations; + + // Test detect + detect(loadMat(img), locations, 0); + +#ifdef DUMP + dump(cv::Mat(block_hists), locations); +#else + compare(cv::Mat(block_hists), locations); +#endif + + // Test detect on smaller image + cv::Mat img2; + cv::resize(img, img2, cv::Size(img.cols / 2, img.rows / 2)); + detect(loadMat(img2), locations, 0); + +#ifdef DUMP + dump(cv::Mat(block_hists), locations); +#else + compare(cv::Mat(block_hists), locations); +#endif + + // Test detect on greater image + cv::resize(img, img2, cv::Size(img.cols * 2, img.rows * 2)); + detect(loadMat(img2), locations, 0); + +#ifdef DUMP + dump(cv::Mat(block_hists), locations); +#else + compare(cv::Mat(block_hists), locations); +#endif + } + + // Does not compare border value, as interpolation leads to delta + void compare_inner_parts(cv::Mat d1, cv::Mat d2) + { + for (int i = 1; i < blocks_per_win_y - 1; ++i) + for (int j = 1; j < blocks_per_win_x - 1; ++j) + for (int k = 0; k < block_hist_size; ++k) + { + float a = d1.at(0, (i * blocks_per_win_x + j) * block_hist_size); + float b = d2.at(0, (i * blocks_per_win_x + j) * block_hist_size); + ASSERT_FLOAT_EQ(a, b); + } + } +}; + +TEST_P(HOG, Detect) +{ + cv::Mat img_rgb = readImage("hog/road.png"); + ASSERT_FALSE(img_rgb.empty()); + +#ifdef DUMP + f.open((std::string(cvtest::TS::ptr()->get_data_path()) + "hog/expected_output.bin").c_str(), std::ios_base::binary); + ASSERT_TRUE(f.is_open()); +#else + f.open((std::string(cvtest::TS::ptr()->get_data_path()) + "hog/expected_output.bin").c_str(), std::ios_base::binary); + ASSERT_TRUE(f.is_open()); +#endif + + // Test on color image + cv::Mat img; + cv::cvtColor(img_rgb, img, CV_BGR2BGRA); + testDetect(img); + + // Test on gray image + cv::cvtColor(img_rgb, img, CV_BGR2GRAY); + testDetect(img); + + f.close(); +} + +TEST_P(HOG, GetDescriptors) +{ + // Load image (e.g. train data, composed from windows) + cv::Mat img_rgb = readImage("hog/train_data.png"); + ASSERT_FALSE(img_rgb.empty()); + + // Convert to C4 + cv::Mat img; + cv::cvtColor(img_rgb, img, CV_BGR2BGRA); + + cv::gpu::GpuMat d_img(img); + + // Convert train images into feature vectors (train table) + cv::gpu::GpuMat descriptors, descriptors_by_cols; + getDescriptors(d_img, win_size, descriptors, DESCR_FORMAT_ROW_BY_ROW); + getDescriptors(d_img, win_size, descriptors_by_cols, DESCR_FORMAT_COL_BY_COL); + + // Check size of the result train table + wins_per_img_x = 3; + wins_per_img_y = 2; + blocks_per_win_x = 7; + blocks_per_win_y = 15; + block_hist_size = 36; + cv::Size descr_size_expected = cv::Size(blocks_per_win_x * blocks_per_win_y * block_hist_size, + wins_per_img_x * wins_per_img_y); + ASSERT_EQ(descr_size_expected, descriptors.size()); + + // Check both formats of output descriptors are handled correctly + cv::Mat dr(descriptors); + cv::Mat dc(descriptors_by_cols); + for (int i = 0; i < wins_per_img_x * wins_per_img_y; ++i) + { + const float* l = dr.rowRange(i, i + 1).ptr(); + const float* r = dc.rowRange(i, i + 1).ptr(); + for (int y = 0; y < blocks_per_win_y; ++y) + for (int x = 0; x < blocks_per_win_x; ++x) + for (int k = 0; k < block_hist_size; ++k) + ASSERT_EQ(l[(y * blocks_per_win_x + x) * block_hist_size + k], + r[(x * blocks_per_win_y + y) * block_hist_size + k]); + } + + /* Now we want to extract the same feature vectors, but from single images. NOTE: results will + be defferent, due to border values interpolation. Using of many small images is slower, however we + wont't call getDescriptors and will use computeBlockHistograms instead of. computeBlockHistograms + works good, it can be checked in the gpu_hog sample */ + + img_rgb = readImage("hog/positive1.png"); + ASSERT_TRUE(!img_rgb.empty()); + cv::cvtColor(img_rgb, img, CV_BGR2BGRA); + computeBlockHistograms(cv::gpu::GpuMat(img)); + // Everything is fine with interpolation for left top subimage + ASSERT_EQ(0.0, cv::norm((cv::Mat)block_hists, (cv::Mat)descriptors.rowRange(0, 1))); + + img_rgb = readImage("hog/positive2.png"); + ASSERT_TRUE(!img_rgb.empty()); + cv::cvtColor(img_rgb, img, CV_BGR2BGRA); + computeBlockHistograms(cv::gpu::GpuMat(img)); + compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(1, 2))); + + img_rgb = readImage("hog/negative1.png"); + ASSERT_TRUE(!img_rgb.empty()); + cv::cvtColor(img_rgb, img, CV_BGR2BGRA); + computeBlockHistograms(cv::gpu::GpuMat(img)); + compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(2, 3))); + + img_rgb = readImage("hog/negative2.png"); + ASSERT_TRUE(!img_rgb.empty()); + cv::cvtColor(img_rgb, img, CV_BGR2BGRA); + computeBlockHistograms(cv::gpu::GpuMat(img)); + compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(3, 4))); + + img_rgb = readImage("hog/positive3.png"); + ASSERT_TRUE(!img_rgb.empty()); + cv::cvtColor(img_rgb, img, CV_BGR2BGRA); + computeBlockHistograms(cv::gpu::GpuMat(img)); + compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(4, 5))); + + img_rgb = readImage("hog/negative3.png"); + ASSERT_TRUE(!img_rgb.empty()); + cv::cvtColor(img_rgb, img, CV_BGR2BGRA); + computeBlockHistograms(cv::gpu::GpuMat(img)); + compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(5, 6))); +} + +INSTANTIATE_TEST_CASE_P(GPU_ObjDetect, HOG, ALL_DEVICES); + +} // namespace