diff --git a/modules/core/include/opencv2/core/cuda_devptrs.hpp b/modules/core/include/opencv2/core/cuda_devptrs.hpp index 373ff290b..72a897b48 100644 --- a/modules/core/include/opencv2/core/cuda_devptrs.hpp +++ b/modules/core/include/opencv2/core/cuda_devptrs.hpp @@ -177,6 +177,20 @@ namespace cv //#undef __CV_GPU_DEPR_BEFORE__ //#undef __CV_GPU_DEPR_AFTER__ + namespace device + { + using cv::gpu::PtrSz; + using cv::gpu::PtrStep; + using cv::gpu::PtrStepSz; + + using cv::gpu::PtrStepSzb; + using cv::gpu::PtrStepSzf; + using cv::gpu::PtrStepSzi; + + using cv::gpu::PtrStepb; + using cv::gpu::PtrStepf; + using cv::gpu::PtrStepi; + } } } diff --git a/modules/core/src/gpumat.cpp b/modules/core/src/gpumat.cpp index d1f965a32..8c7a4708d 100644 --- a/modules/core/src/gpumat.cpp +++ b/modules/core/src/gpumat.cpp @@ -315,18 +315,6 @@ void cv::gpu::DeviceInfo::queryMemory(size_t& free_memory, size_t& total_memory) namespace { - template void getCudaAttribute(T *attribute, CUdevice_attribute device_attribute, int device) - { - *attribute = T(); - //CUresult error = CUDA_SUCCESS;// = cuDeviceGetAttribute( attribute, device_attribute, device ); why link erros under ubuntu?? - CUresult error = cuDeviceGetAttribute( attribute, device_attribute, device ); - if( CUDA_SUCCESS == error ) - return; - - printf("Driver API error = %04d\n", error); - cv::gpu::error("driver API error", __FILE__, __LINE__); - } - int convertSMVer2Cores(int major, int minor) { // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM @@ -335,7 +323,7 @@ namespace int Cores; } SMtoCores; - SMtoCores gpuArchCoresPerSM[] = { { 0x10, 8 }, { 0x11, 8 }, { 0x12, 8 }, { 0x13, 8 }, { 0x20, 32 }, { 0x21, 48 }, {0x30, 192}, { -1, -1 } }; + SMtoCores gpuArchCoresPerSM[] = { { 0x10, 8 }, { 0x11, 8 }, { 0x12, 8 }, { 0x13, 8 }, { 0x20, 32 }, { 0x21, 48 }, {0x30, 192}, {0x35, 192}, { -1, -1 } }; int index = 0; while (gpuArchCoresPerSM[index].SM != -1) @@ -344,7 +332,7 @@ namespace return gpuArchCoresPerSM[index].Cores; index++; } - printf("MapSMtoCores undefined SMversion %d.%d!\n", major, minor); + return -1; } } @@ -382,22 +370,13 @@ void cv::gpu::printCudaDeviceInfo(int device) printf(" CUDA Driver Version / Runtime Version %d.%d / %d.%d\n", driverVersion/1000, driverVersion%100, runtimeVersion/1000, runtimeVersion%100); printf(" CUDA Capability Major/Minor version number: %d.%d\n", prop.major, prop.minor); printf(" Total amount of global memory: %.0f MBytes (%llu bytes)\n", (float)prop.totalGlobalMem/1048576.0f, (unsigned long long) prop.totalGlobalMem); - printf(" (%2d) Multiprocessors x (%2d) CUDA Cores/MP: %d CUDA Cores\n", - prop.multiProcessorCount, convertSMVer2Cores(prop.major, prop.minor), - convertSMVer2Cores(prop.major, prop.minor) * prop.multiProcessorCount); + + int cores = convertSMVer2Cores(prop.major, prop.minor); + if (cores > 0) + printf(" (%2d) Multiprocessors x (%2d) CUDA Cores/MP: %d CUDA Cores\n", prop.multiProcessorCount, cores, cores * prop.multiProcessorCount); + printf(" GPU Clock Speed: %.2f GHz\n", prop.clockRate * 1e-6f); - // This is not available in the CUDA Runtime API, so we make the necessary calls the driver API to support this for output - int memoryClock, memBusWidth, L2CacheSize; - getCudaAttribute( &memoryClock, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, dev ); - getCudaAttribute( &memBusWidth, CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, dev ); - getCudaAttribute( &L2CacheSize, CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, dev ); - - printf(" Memory Clock rate: %.2f Mhz\n", memoryClock * 1e-3f); - printf(" Memory Bus Width: %d-bit\n", memBusWidth); - if (L2CacheSize) - printf(" L2 Cache Size: %d bytes\n", L2CacheSize); - printf(" Max Texture Dimension Size (x,y,z) 1D=(%d), 2D=(%d,%d), 3D=(%d,%d,%d)\n", prop.maxTexture1D, prop.maxTexture2D[0], prop.maxTexture2D[1], prop.maxTexture3D[0], prop.maxTexture3D[1], prop.maxTexture3D[2]); @@ -457,7 +436,12 @@ void cv::gpu::printShortCudaDeviceInfo(int device) const char *arch_str = prop.major < 2 ? " (not Fermi)" : ""; printf("Device %d: \"%s\" %.0fMb", dev, prop.name, (float)prop.totalGlobalMem/1048576.0f); - printf(", sm_%d%d%s, %d cores", prop.major, prop.minor, arch_str, convertSMVer2Cores(prop.major, prop.minor) * prop.multiProcessorCount); + printf(", sm_%d%d%s", prop.major, prop.minor, arch_str); + + int cores = convertSMVer2Cores(prop.major, prop.minor); + if (cores > 0) + printf(", %d cores", cores * prop.multiProcessorCount); + printf(", Driver/Runtime ver.%d.%d/%d.%d\n", driverVersion/1000, driverVersion%100, runtimeVersion/1000, runtimeVersion%100); } fflush(stdout); diff --git a/modules/gpu/CMakeLists.txt b/modules/gpu/CMakeLists.txt index 580e7dfb9..cf69ba125 100644 --- a/modules/gpu/CMakeLists.txt +++ b/modules/gpu/CMakeLists.txt @@ -52,13 +52,11 @@ if (HAVE_CUDA) set(cuda_link_libs ${CUDA_LIBRARIES} ${CUDA_npp_LIBRARY}) if(NOT APPLE) - unset(CUDA_nvcuvid_LIBRARY CACHE) find_cuda_helper_libs(nvcuvid) set(cuda_link_libs ${cuda_link_libs} ${CUDA_nvcuvid_LIBRARY}) endif() if(WIN32) - unset(CUDA_nvcuvenc_LIBRARY CACHE) find_cuda_helper_libs(nvcuvenc) set(cuda_link_libs ${cuda_link_libs} ${CUDA_nvcuvenc_LIBRARY}) endif() diff --git a/modules/gpu/doc/matrix_reductions.rst b/modules/gpu/doc/matrix_reductions.rst index ee2250a39..538267eb7 100644 --- a/modules/gpu/doc/matrix_reductions.rst +++ b/modules/gpu/doc/matrix_reductions.rst @@ -185,7 +185,7 @@ Reduces a matrix to a vector. * **CV_REDUCE_MIN** The output is the minimum (column/row-wise) of all rows/columns of the matrix. :param dtype: When it is negative, the destination vector will have the same type as the source matrix. Otherwise, its type will be ``CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), mtx.channels())`` . - + The function ``reduce`` reduces the matrix to a vector by treating the matrix rows/columns as a set of 1D vectors and performing the specified operation on the vectors until a single row/column is obtained. For example, the function can be used to compute horizontal and vertical projections of a raster image. In case of ``CV_REDUCE_SUM`` and ``CV_REDUCE_AVG`` , the output may have a larger element bit-depth to preserve accuracy. And multi-channel arrays are also supported in these two reduction modes. .. seealso:: :ocv:func:`reduce` diff --git a/modules/gpu/src/opencv2/gpu/device/block.hpp b/modules/gpu/include/opencv2/gpu/device/block.hpp similarity index 100% rename from modules/gpu/src/opencv2/gpu/device/block.hpp rename to modules/gpu/include/opencv2/gpu/device/block.hpp diff --git a/modules/gpu/include/opencv2/gpu/device/common.hpp b/modules/gpu/include/opencv2/gpu/device/common.hpp index 141467fdc..7fb1036e8 100644 --- a/modules/gpu/include/opencv2/gpu/device/common.hpp +++ b/modules/gpu/include/opencv2/gpu/device/common.hpp @@ -85,8 +85,6 @@ static inline void ___cudaSafeCall(cudaError_t err, const char *file, const int cv::gpu::error(cudaGetErrorString(err), file, line, func); } -#ifdef __CUDACC__ - namespace cv { namespace gpu { __host__ __device__ __forceinline__ int divUp(int total, int grain) @@ -96,19 +94,25 @@ namespace cv { namespace gpu namespace device { + using cv::gpu::divUp; + +#ifdef __CUDACC__ typedef unsigned char uchar; typedef unsigned short ushort; typedef signed char schar; - typedef unsigned int uint; + #ifdef _WIN32 + typedef unsigned int uint; + #endif template inline void bindTexture(const textureReference* tex, const PtrStepSz& img) { cudaChannelFormatDesc desc = cudaCreateChannelDesc(); cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) ); } +#endif // __CUDACC__ } }} -#endif // __CUDACC__ + #endif // __OPENCV_GPU_COMMON_HPP__ diff --git a/modules/gpu/include/opencv2/gpu/device/emulation.hpp b/modules/gpu/include/opencv2/gpu/device/emulation.hpp index 074e91127..b6fba230e 100644 --- a/modules/gpu/include/opencv2/gpu/device/emulation.hpp +++ b/modules/gpu/include/opencv2/gpu/device/emulation.hpp @@ -44,7 +44,6 @@ #define OPENCV_GPU_EMULATION_HPP_ #include "warp_reduce.hpp" -#include namespace cv { namespace gpu { namespace device { diff --git a/modules/gpu/include/opencv2/gpu/device/functional.hpp b/modules/gpu/include/opencv2/gpu/device/functional.hpp index c601cf527..6064e8e99 100644 --- a/modules/gpu/include/opencv2/gpu/device/functional.hpp +++ b/modules/gpu/include/opencv2/gpu/device/functional.hpp @@ -302,18 +302,18 @@ namespace cv { namespace gpu { namespace device template <> struct name : binary_function \ { \ __device__ __forceinline__ type operator()(type lhs, type rhs) const {return op(lhs, rhs);} \ - __device__ __forceinline__ name(const name& other):binary_function(){}\ - __device__ __forceinline__ name():binary_function(){}\ + __device__ __forceinline__ name() {}\ + __device__ __forceinline__ name(const name&) {}\ }; template struct maximum : binary_function { __device__ __forceinline__ T operator()(typename TypeTraits::ParameterType lhs, typename TypeTraits::ParameterType rhs) const { - return lhs < rhs ? rhs : lhs; + return max(lhs, rhs); } - __device__ __forceinline__ maximum(const maximum& other):binary_function(){} - __device__ __forceinline__ maximum():binary_function(){} + __device__ __forceinline__ maximum() {} + __device__ __forceinline__ maximum(const maximum&) {} }; OPENCV_GPU_IMPLEMENT_MINMAX(maximum, uchar, ::max) @@ -330,10 +330,10 @@ namespace cv { namespace gpu { namespace device { __device__ __forceinline__ T operator()(typename TypeTraits::ParameterType lhs, typename TypeTraits::ParameterType rhs) const { - return lhs < rhs ? lhs : rhs; + return min(lhs, rhs); } - __device__ __forceinline__ minimum(const minimum& other):binary_function(){} - __device__ __forceinline__ minimum():binary_function(){} + __device__ __forceinline__ minimum() {} + __device__ __forceinline__ minimum(const minimum&) {} }; OPENCV_GPU_IMPLEMENT_MINMAX(minimum, uchar, ::min) @@ -350,6 +350,108 @@ namespace cv { namespace gpu { namespace device // Math functions ///bound========================================= + + template struct abs_func : unary_function + { + __device__ __forceinline__ T operator ()(typename TypeTraits::ParameterType x) const + { + return abs(x); + } + + __device__ __forceinline__ abs_func() {} + __device__ __forceinline__ abs_func(const abs_func&) {} + }; + template <> struct abs_func : unary_function + { + __device__ __forceinline__ unsigned char operator ()(unsigned char x) const + { + return x; + } + + __device__ __forceinline__ abs_func() {} + __device__ __forceinline__ abs_func(const abs_func&) {} + }; + template <> struct abs_func : unary_function + { + __device__ __forceinline__ signed char operator ()(signed char x) const + { + return ::abs((int)x); + } + + __device__ __forceinline__ abs_func() {} + __device__ __forceinline__ abs_func(const abs_func&) {} + }; + template <> struct abs_func : unary_function + { + __device__ __forceinline__ char operator ()(char x) const + { + return ::abs((int)x); + } + + __device__ __forceinline__ abs_func() {} + __device__ __forceinline__ abs_func(const abs_func&) {} + }; + template <> struct abs_func : unary_function + { + __device__ __forceinline__ unsigned short operator ()(unsigned short x) const + { + return x; + } + + __device__ __forceinline__ abs_func() {} + __device__ __forceinline__ abs_func(const abs_func&) {} + }; + template <> struct abs_func : unary_function + { + __device__ __forceinline__ short operator ()(short x) const + { + return ::abs((int)x); + } + + __device__ __forceinline__ abs_func() {} + __device__ __forceinline__ abs_func(const abs_func&) {} + }; + template <> struct abs_func : unary_function + { + __device__ __forceinline__ unsigned int operator ()(unsigned int x) const + { + return x; + } + + __device__ __forceinline__ abs_func() {} + __device__ __forceinline__ abs_func(const abs_func&) {} + }; + template <> struct abs_func : unary_function + { + __device__ __forceinline__ int operator ()(int x) const + { + return ::abs(x); + } + + __device__ __forceinline__ abs_func() {} + __device__ __forceinline__ abs_func(const abs_func&) {} + }; + template <> struct abs_func : unary_function + { + __device__ __forceinline__ float operator ()(float x) const + { + return ::fabsf(x); + } + + __device__ __forceinline__ abs_func() {} + __device__ __forceinline__ abs_func(const abs_func&) {} + }; + template <> struct abs_func : unary_function + { + __device__ __forceinline__ double operator ()(double x) const + { + return ::fabs(x); + } + + __device__ __forceinline__ abs_func() {} + __device__ __forceinline__ abs_func(const abs_func&) {} + }; + #define OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(name, func) \ template struct name ## _func : unary_function \ { \ @@ -357,6 +459,8 @@ namespace cv { namespace gpu { namespace device { \ return func ## f(v); \ } \ + __device__ __forceinline__ name ## _func() {} \ + __device__ __forceinline__ name ## _func(const name ## _func&) {} \ }; \ template <> struct name ## _func : unary_function \ { \ @@ -364,6 +468,8 @@ namespace cv { namespace gpu { namespace device { \ return func(v); \ } \ + __device__ __forceinline__ name ## _func() {} \ + __device__ __forceinline__ name ## _func(const name ## _func&) {} \ }; #define OPENCV_GPU_IMPLEMENT_BIN_FUNCTOR(name, func) \ @@ -382,7 +488,6 @@ namespace cv { namespace gpu { namespace device } \ }; - OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(fabs, ::fabs) OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(sqrt, ::sqrt) OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(exp, ::exp) OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(exp2, ::exp2) diff --git a/modules/gpu/include/opencv2/gpu/device/vec_math.hpp b/modules/gpu/include/opencv2/gpu/device/vec_math.hpp index 0ec790c0b..1c46dc0c3 100644 --- a/modules/gpu/include/opencv2/gpu/device/vec_math.hpp +++ b/modules/gpu/include/opencv2/gpu/device/vec_math.hpp @@ -280,7 +280,7 @@ namespace cv { namespace gpu { namespace device OPENCV_GPU_IMPLEMENT_VEC_UNOP (type, operator ! , logical_not) \ OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, max, maximum) \ OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, min, minimum) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, fabs, fabs_func) \ + OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, abs, abs_func) \ OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, sqrt, sqrt_func) \ OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, exp, exp_func) \ OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, exp2, exp2_func) \ @@ -327,4 +327,4 @@ namespace cv { namespace gpu { namespace device #undef OPENCV_GPU_IMPLEMENT_VEC_INT_OP }}} // namespace cv { namespace gpu { namespace device -#endif // __OPENCV_GPU_VECMATH_HPP__ \ No newline at end of file +#endif // __OPENCV_GPU_VECMATH_HPP__ diff --git a/modules/gpu/src/brute_force_matcher.cpp b/modules/gpu/src/brute_force_matcher.cpp index 0843c7d1a..bc1818886 100644 --- a/modules/gpu/src/brute_force_matcher.cpp +++ b/modules/gpu/src/brute_force_matcher.cpp @@ -268,14 +268,14 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchConvert(const Mat& trainIdx, cons const float* distance_ptr = distance.ptr(); for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx, ++trainIdx_ptr, ++distance_ptr) { - int _trainIdx = *trainIdx_ptr; + int train_idx = *trainIdx_ptr; - if (_trainIdx == -1) + if (train_idx == -1) continue; - float _distance = *distance_ptr; + float distance_local = *distance_ptr; - DMatch m(queryIdx, _trainIdx, 0, _distance); + DMatch m(queryIdx, train_idx, 0, distance_local); matches.push_back(m); } @@ -413,16 +413,16 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchConvert(const Mat& trainIdx, cons const float* distance_ptr = distance.ptr(); for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx, ++trainIdx_ptr, ++imgIdx_ptr, ++distance_ptr) { - int trainIdx = *trainIdx_ptr; + int _trainIdx = *trainIdx_ptr; - if (trainIdx == -1) + if (_trainIdx == -1) continue; - int imgIdx = *imgIdx_ptr; + int _imgIdx = *imgIdx_ptr; - float distance = *distance_ptr; + float _distance = *distance_ptr; - DMatch m(queryIdx, trainIdx, imgIdx, distance); + DMatch m(queryIdx, _trainIdx, _imgIdx, _distance); matches.push_back(m); } @@ -548,13 +548,13 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatchConvert(const Mat& trainIdx, c for (int i = 0; i < k; ++i, ++trainIdx_ptr, ++distance_ptr) { - int trainIdx = *trainIdx_ptr; + int _trainIdx = *trainIdx_ptr; - if (trainIdx != -1) + if (_trainIdx != -1) { - float distance = *distance_ptr; + float _distance = *distance_ptr; - DMatch m(queryIdx, trainIdx, 0, distance); + DMatch m(queryIdx, _trainIdx, 0, _distance); curMatches.push_back(m); } @@ -667,15 +667,15 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch2Convert(const Mat& trainIdx, for (int i = 0; i < 2; ++i, ++trainIdx_ptr, ++imgIdx_ptr, ++distance_ptr) { - int trainIdx = *trainIdx_ptr; + int _trainIdx = *trainIdx_ptr; - if (trainIdx != -1) + if (_trainIdx != -1) { - int imgIdx = *imgIdx_ptr; + int _imgIdx = *imgIdx_ptr; - float distance = *distance_ptr; + float _distance = *distance_ptr; - DMatch m(queryIdx, trainIdx, imgIdx, distance); + DMatch m(queryIdx, _trainIdx, _imgIdx, _distance); curMatches.push_back(m); } @@ -852,25 +852,25 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchConvert(const Mat& trainIdx const int* trainIdx_ptr = trainIdx.ptr(queryIdx); const float* distance_ptr = distance.ptr(queryIdx); - const int nMatches = std::min(nMatches_ptr[queryIdx], trainIdx.cols); + const int nMatched = std::min(nMatches_ptr[queryIdx], trainIdx.cols); - if (nMatches == 0) + if (nMatched == 0) { if (!compactResult) matches.push_back(vector()); continue; } - matches.push_back(vector(nMatches)); + matches.push_back(vector(nMatched)); vector& curMatches = matches.back(); - for (int i = 0; i < nMatches; ++i, ++trainIdx_ptr, ++distance_ptr) + for (int i = 0; i < nMatched; ++i, ++trainIdx_ptr, ++distance_ptr) { - int trainIdx = *trainIdx_ptr; + int _trainIdx = *trainIdx_ptr; - float distance = *distance_ptr; + float _distance = *distance_ptr; - DMatch m(queryIdx, trainIdx, 0, distance); + DMatch m(queryIdx, _trainIdx, 0, _distance); curMatches[i] = m; } @@ -990,9 +990,9 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchConvert(const Mat& trainIdx const int* imgIdx_ptr = imgIdx.ptr(queryIdx); const float* distance_ptr = distance.ptr(queryIdx); - const int nMatches = std::min(nMatches_ptr[queryIdx], trainIdx.cols); + const int nMatched = std::min(nMatches_ptr[queryIdx], trainIdx.cols); - if (nMatches == 0) + if (nMatched == 0) { if (!compactResult) matches.push_back(vector()); @@ -1001,9 +1001,9 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchConvert(const Mat& trainIdx matches.push_back(vector()); vector& curMatches = matches.back(); - curMatches.reserve(nMatches); + curMatches.reserve(nMatched); - for (int i = 0; i < nMatches; ++i, ++trainIdx_ptr, ++imgIdx_ptr, ++distance_ptr) + for (int i = 0; i < nMatched; ++i, ++trainIdx_ptr, ++imgIdx_ptr, ++distance_ptr) { int _trainIdx = *trainIdx_ptr; int _imgIdx = *imgIdx_ptr; diff --git a/modules/gpu/src/cascadeclassifier.cpp b/modules/gpu/src/cascadeclassifier.cpp index f2aa1a4b7..ac839ce96 100644 --- a/modules/gpu/src/cascadeclassifier.cpp +++ b/modules/gpu/src/cascadeclassifier.cpp @@ -622,7 +622,7 @@ private: } // copy data structures on gpu - stage_mat.upload(cv::Mat(1, stages.size() * sizeof(Stage), CV_8UC1, (uchar*)&(stages[0]) )); + stage_mat.upload(cv::Mat(1, (int) (stages.size() * sizeof(Stage)), CV_8UC1, (uchar*)&(stages[0]) )); trees_mat.upload(cv::Mat(cl_trees).reshape(1,1)); nodes_mat.upload(cv::Mat(cl_nodes).reshape(1,1)); leaves_mat.upload(cv::Mat(cl_leaves).reshape(1,1)); diff --git a/modules/gpu/src/cuda/ccomponetns.cu b/modules/gpu/src/cuda/ccomponetns.cu index 62e81376a..c094e08c0 100644 --- a/modules/gpu/src/cuda/ccomponetns.cu +++ b/modules/gpu/src/cuda/ccomponetns.cu @@ -497,6 +497,7 @@ namespace cv { namespace gpu { namespace device void labelComponents(const PtrStepSzb& edges, PtrStepSzi comps, int flags, cudaStream_t stream) { + (void) flags; dim3 block(CTA_SIZE_X, CTA_SIZE_Y); dim3 grid(divUp(edges.cols, TILE_COLS), divUp(edges.rows, TILE_ROWS)); @@ -529,4 +530,4 @@ namespace cv { namespace gpu { namespace device } } } } -#endif /* CUDA_DISABLER */ \ No newline at end of file +#endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/gftt.cu b/modules/gpu/src/cuda/gftt.cu index 4c21bd911..cae217e39 100644 --- a/modules/gpu/src/cuda/gftt.cu +++ b/modules/gpu/src/cuda/gftt.cu @@ -47,6 +47,7 @@ #if !defined CUDA_DISABLER +#include #include #include "opencv2/gpu/device/common.hpp" @@ -148,4 +149,4 @@ namespace cv { namespace gpu { namespace device }}} -#endif /* CUDA_DISABLER */ \ No newline at end of file +#endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/hough.cu b/modules/gpu/src/cuda/hough.cu index ee4d02591..8e35aa858 100644 --- a/modules/gpu/src/cuda/hough.cu +++ b/modules/gpu/src/cuda/hough.cu @@ -42,7 +42,9 @@ #if !defined CUDA_DISABLER +#include #include + #include "opencv2/gpu/device/common.hpp" #include "opencv2/gpu/device/emulation.hpp" #include "opencv2/gpu/device/vec_math.hpp" @@ -1509,4 +1511,4 @@ namespace cv { namespace gpu { namespace device }}} -#endif /* CUDA_DISABLER */ \ No newline at end of file +#endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/lbp.cu b/modules/gpu/src/cuda/lbp.cu index 55f5d7512..a09aa1e79 100644 --- a/modules/gpu/src/cuda/lbp.cu +++ b/modules/gpu/src/cuda/lbp.cu @@ -295,7 +295,7 @@ namespace cv { namespace gpu { namespace device int grid = divUp(workAmount, block); cudaFuncSetCacheConfig(lbp_cascade, cudaFuncCachePreferL1); Cascade cascade((Stage*)mstages.ptr(), nstages, (ClNode*)mnodes.ptr(), mleaves.ptr(), msubsets.ptr(), (uchar4*)mfeatures.ptr(), subsetSize); - lbp_cascade<<>>(cascade, frameW, frameH, windowW, windowH, initialScale, factor, workAmount, integral.ptr(), integral.step / sizeof(int), objects, classified); + lbp_cascade<<>>(cascade, frameW, frameH, windowW, windowH, initialScale, factor, workAmount, integral.ptr(), (int)integral.step / sizeof(int), objects, classified); } } }}} diff --git a/modules/gpu/src/cuda/mathfunc.cu b/modules/gpu/src/cuda/mathfunc.cu index 516ea37a8..3ae8fdc7b 100644 --- a/modules/gpu/src/cuda/mathfunc.cu +++ b/modules/gpu/src/cuda/mathfunc.cu @@ -76,7 +76,7 @@ namespace cv { namespace gpu { namespace device static __device__ __forceinline__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float scale) { float angle = ::atan2f(y_data, x_data); - angle += (angle < 0) * 2.0 * CV_PI; + angle += (angle < 0) * 2.0f * CV_PI_F; dst[y * dst_step + x] = scale * angle; } }; @@ -140,7 +140,7 @@ namespace cv { namespace gpu { namespace device grid.x = divUp(x.cols, threads.x); grid.y = divUp(x.rows, threads.y); - const float scale = angleInDegrees ? (float)(180.0f / CV_PI) : 1.f; + const float scale = angleInDegrees ? (180.0f / CV_PI_F) : 1.f; cartToPolar<<>>( x.data, x.step/x.elemSize(), y.data, y.step/y.elemSize(), @@ -190,7 +190,7 @@ namespace cv { namespace gpu { namespace device grid.x = divUp(mag.cols, threads.x); grid.y = divUp(mag.rows, threads.y); - const float scale = angleInDegrees ? (float)(CV_PI / 180.0f) : 1.0f; + const float scale = angleInDegrees ? (CV_PI_F / 180.0f) : 1.0f; polarToCart<<>>(mag.data, mag.step/mag.elemSize(), angle.data, angle.step/angle.elemSize(), scale, x.data, x.step/x.elemSize(), y.data, y.step/y.elemSize(), mag.cols, mag.rows); @@ -214,4 +214,4 @@ namespace cv { namespace gpu { namespace device } // namespace mathfunc }}} // namespace cv { namespace gpu { namespace device -#endif /* CUDA_DISABLER */ \ No newline at end of file +#endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/optical_flow.cu b/modules/gpu/src/cuda/optical_flow.cu index 0c8d140f9..d2c68a79f 100644 --- a/modules/gpu/src/cuda/optical_flow.cu +++ b/modules/gpu/src/cuda/optical_flow.cu @@ -164,40 +164,40 @@ namespace cv { namespace gpu { namespace device r = ::fmin(r, 2.5f); - v[1].x = arrow_x + r * ::cosf(theta - CV_PI / 2.0f); - v[1].y = arrow_y + r * ::sinf(theta - CV_PI / 2.0f); + v[1].x = arrow_x + r * ::cosf(theta - CV_PI_F / 2.0f); + v[1].y = arrow_y + r * ::sinf(theta - CV_PI_F / 2.0f); - v[4].x = arrow_x + r * ::cosf(theta + CV_PI / 2.0f); - v[4].y = arrow_y + r * ::sinf(theta + CV_PI / 2.0f); + v[4].x = arrow_x + r * ::cosf(theta + CV_PI_F / 2.0f); + v[4].y = arrow_y + r * ::sinf(theta + CV_PI_F / 2.0f); int indx = (y * u_avg.cols + x) * NUM_VERTS_PER_ARROW * 3; - color_data[indx] = (theta - CV_PI) / CV_PI * 180.0f; + color_data[indx] = (theta - CV_PI_F) / CV_PI_F * 180.0f; vertex_data[indx++] = v[0].x * xscale; vertex_data[indx++] = v[0].y * yscale; vertex_data[indx++] = v[0].z; - color_data[indx] = (theta - CV_PI) / CV_PI * 180.0f; + color_data[indx] = (theta - CV_PI_F) / CV_PI_F * 180.0f; vertex_data[indx++] = v[1].x * xscale; vertex_data[indx++] = v[1].y * yscale; vertex_data[indx++] = v[1].z; - color_data[indx] = (theta - CV_PI) / CV_PI * 180.0f; + color_data[indx] = (theta - CV_PI_F) / CV_PI_F * 180.0f; vertex_data[indx++] = v[2].x * xscale; vertex_data[indx++] = v[2].y * yscale; vertex_data[indx++] = v[2].z; - color_data[indx] = (theta - CV_PI) / CV_PI * 180.0f; + color_data[indx] = (theta - CV_PI_F) / CV_PI_F * 180.0f; vertex_data[indx++] = v[3].x * xscale; vertex_data[indx++] = v[3].y * yscale; vertex_data[indx++] = v[3].z; - color_data[indx] = (theta - CV_PI) / CV_PI * 180.0f; + color_data[indx] = (theta - CV_PI_F) / CV_PI_F * 180.0f; vertex_data[indx++] = v[4].x * xscale; vertex_data[indx++] = v[4].y * yscale; vertex_data[indx++] = v[4].z; - color_data[indx] = (theta - CV_PI) / CV_PI * 180.0f; + color_data[indx] = (theta - CV_PI_F) / CV_PI_F * 180.0f; vertex_data[indx++] = v[5].x * xscale; vertex_data[indx++] = v[5].y * yscale; vertex_data[indx++] = v[5].z; @@ -217,4 +217,4 @@ namespace cv { namespace gpu { namespace device } }}} -#endif /* CUDA_DISABLER */ \ No newline at end of file +#endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/optical_flow_farneback.cu b/modules/gpu/src/cuda/optical_flow_farneback.cu index 8231775b8..5bbca34f1 100644 --- a/modules/gpu/src/cuda/optical_flow_farneback.cu +++ b/modules/gpu/src/cuda/optical_flow_farneback.cu @@ -42,7 +42,6 @@ #if !defined CUDA_DISABLER -#include #include "internal_shared.hpp" #include "opencv2/gpu/device/common.hpp" #include "opencv2/gpu/device/border_interpolate.hpp" @@ -57,8 +56,6 @@ #define BORDER_SIZE 5 #define MAX_KSIZE_HALF 100 -using namespace std; - namespace cv { namespace gpu { namespace device { namespace optflow_farneback { __constant__ float c_g[8]; diff --git a/modules/gpu/src/cuda/pyrlk.cu b/modules/gpu/src/cuda/pyrlk.cu index d1a65c210..811c3b90b 100644 --- a/modules/gpu/src/cuda/pyrlk.cu +++ b/modules/gpu/src/cuda/pyrlk.cu @@ -267,7 +267,7 @@ namespace cv { namespace gpu { namespace device } __device__ __forceinline__ float4 abs_(const float4& a) { - return fabs(a); + return abs(a); } template @@ -681,4 +681,4 @@ namespace cv { namespace gpu { namespace device } }}} -#endif /* CUDA_DISABLER */ \ No newline at end of file +#endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/split_merge.cu b/modules/gpu/src/cuda/split_merge.cu index a62c76bbe..834b283f0 100644 --- a/modules/gpu/src/cuda/split_merge.cu +++ b/modules/gpu/src/cuda/split_merge.cu @@ -508,4 +508,4 @@ namespace cv { namespace gpu { namespace device }}} // namespace cv { namespace gpu { namespace device -#endif /* CUDA_DISABLER */ \ No newline at end of file +#endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/stereobp.cu b/modules/gpu/src/cuda/stereobp.cu index 736aa3f1f..18d3ae797 100644 --- a/modules/gpu/src/cuda/stereobp.cu +++ b/modules/gpu/src/cuda/stereobp.cu @@ -454,7 +454,7 @@ namespace cv { namespace gpu { namespace device grid.x = divUp(cols, threads.x << 1); grid.y = divUp(rows, threads.y); - int elem_step = u.step/sizeof(T); + int elem_step = (int)(u.step / sizeof(T)); for(int t = 0; t < iters; ++t) { diff --git a/modules/gpu/src/cuda/surf.cu b/modules/gpu/src/cuda/surf.cu index 37c4eb48a..b37f47f09 100644 --- a/modules/gpu/src/cuda/surf.cu +++ b/modules/gpu/src/cuda/surf.cu @@ -638,7 +638,7 @@ namespace cv { namespace gpu { namespace device kp_dir *= 180.0f / CV_PI_F; kp_dir = 360.0f - kp_dir; - if (abs(kp_dir - 360.f) < FLT_EPSILON) + if (::fabsf(kp_dir - 360.f) < FLT_EPSILON) kp_dir = 0.f; featureDir[blockIdx.x] = kp_dir; @@ -1003,4 +1003,4 @@ namespace cv { namespace gpu { namespace device }}} // namespace cv { namespace gpu { namespace device -#endif /* CUDA_DISABLER */ \ No newline at end of file +#endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/texture_binder.hpp b/modules/gpu/src/cuda/texture_binder.hpp index 4f42b099d..391eb9a19 100644 --- a/modules/gpu/src/cuda/texture_binder.hpp +++ b/modules/gpu/src/cuda/texture_binder.hpp @@ -85,7 +85,7 @@ namespace cv namespace device { - using pcl::gpu::TextureBinder; + using cv::gpu::TextureBinder; } } diff --git a/modules/gpu/src/fast.cpp b/modules/gpu/src/fast.cpp index d09210da4..f8b3b9887 100644 --- a/modules/gpu/src/fast.cpp +++ b/modules/gpu/src/fast.cpp @@ -125,9 +125,6 @@ int cv::gpu::FAST_GPU::calcKeyPointsLocation(const GpuMat& img, const GpuMat& ma CV_Assert(img.type() == CV_8UC1); CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == img.size())); - if (!TargetArchs::builtWith(GLOBAL_ATOMICS) || !DeviceInfo().supports(GLOBAL_ATOMICS)) - CV_Error(CV_StsNotImplemented, "The device doesn't support global atomics"); - int maxKeypoints = static_cast(keypointsRatio * img.size().area()); ensureSizeIsEnough(1, maxKeypoints, CV_16SC2, kpLoc_); @@ -148,9 +145,6 @@ int cv::gpu::FAST_GPU::getKeyPoints(GpuMat& keypoints) { using namespace cv::gpu::device::fast; - if (!TargetArchs::builtWith(GLOBAL_ATOMICS) || !DeviceInfo().supports(GLOBAL_ATOMICS)) - CV_Error(CV_StsNotImplemented, "The device doesn't support global atomics"); - if (count_ == 0) return 0; diff --git a/modules/gpu/src/gftt.cpp b/modules/gpu/src/gftt.cpp index 0c8f165ae..6bb73de75 100644 --- a/modules/gpu/src/gftt.cpp +++ b/modules/gpu/src/gftt.cpp @@ -68,9 +68,6 @@ void cv::gpu::GoodFeaturesToTrackDetector_GPU::operator ()(const GpuMat& image, CV_Assert(qualityLevel > 0 && minDistance >= 0 && maxCorners >= 0); CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size())); - if (!TargetArchs::builtWith(GLOBAL_ATOMICS) || !DeviceInfo().supports(GLOBAL_ATOMICS)) - CV_Error(CV_StsNotImplemented, "The device doesn't support global atomics"); - ensureSizeIsEnough(image.size(), CV_32F, eig_); if (useHarrisDetector) diff --git a/modules/gpu/src/nvidia/core/NCV.cu b/modules/gpu/src/nvidia/core/NCV.cu index 5d1b5d12c..77e59cc5c 100644 --- a/modules/gpu/src/nvidia/core/NCV.cu +++ b/modules/gpu/src/nvidia/core/NCV.cu @@ -45,8 +45,6 @@ #include #include "NCV.hpp" -using namespace std; - //============================================================================== // @@ -55,16 +53,16 @@ using namespace std; //============================================================================== -static void stdDebugOutput(const string &msg) +static void stdDebugOutput(const std::string &msg) { - cout << msg; + std::cout << msg; } static NCVDebugOutputHandler *debugOutputHandler = stdDebugOutput; -void ncvDebugOutput(const string &msg) +void ncvDebugOutput(const std::string &msg) { debugOutputHandler(msg); } diff --git a/modules/gpu/src/nvidia/core/NCV.hpp b/modules/gpu/src/nvidia/core/NCV.hpp index ddac47c92..703cb827b 100644 --- a/modules/gpu/src/nvidia/core/NCV.hpp +++ b/modules/gpu/src/nvidia/core/NCV.hpp @@ -288,7 +288,7 @@ NCV_EXPORTS void ncvSetDebugOutputHandler(NCVDebugOutputHandler* func); do \ { \ cudaError_t res = cudacall; \ - ncvAssertPrintReturn(cudaSuccess==res, "cudaError_t=" << res, errCode); \ + ncvAssertPrintReturn(cudaSuccess==res, "cudaError_t=" << (int)res, errCode); \ } while (0) @@ -296,7 +296,7 @@ NCV_EXPORTS void ncvSetDebugOutputHandler(NCVDebugOutputHandler* func); do \ { \ cudaError_t res = cudaGetLastError(); \ - ncvAssertPrintReturn(cudaSuccess==res, "cudaError_t=" << res, errCode); \ + ncvAssertPrintReturn(cudaSuccess==res, "cudaError_t=" << (int)res, errCode); \ } while (0) diff --git a/samples/gpu/cascadeclassifier_nvidia_api.cpp b/samples/gpu/cascadeclassifier_nvidia_api.cpp index da98643af..99c95ab97 100644 --- a/samples/gpu/cascadeclassifier_nvidia_api.cpp +++ b/samples/gpu/cascadeclassifier_nvidia_api.cpp @@ -30,7 +30,7 @@ const Size2i preferredVideoFrameSize(640, 480); const string wndTitle = "NVIDIA Computer Vision :: Haar Classifiers Cascade"; -void matPrint(Mat &img, int lineOffsY, Scalar fontColor, const string &ss) +static void matPrint(Mat &img, int lineOffsY, Scalar fontColor, const string &ss) { int fontFace = FONT_HERSHEY_DUPLEX; double fontScale = 0.8; @@ -45,7 +45,7 @@ void matPrint(Mat &img, int lineOffsY, Scalar fontColor, const string &ss) } -void displayState(Mat &canvas, bool bHelp, bool bGpu, bool bLargestFace, bool bFilter, double fps) +static void displayState(Mat &canvas, bool bHelp, bool bGpu, bool bLargestFace, bool bFilter, double fps) { Scalar fontColorRed = CV_RGB(255,0,0); Scalar fontColorNV = CV_RGB(118,185,0); @@ -74,7 +74,7 @@ void displayState(Mat &canvas, bool bHelp, bool bGpu, bool bLargestFace, bool bF } -NCVStatus process(Mat *srcdst, +static NCVStatus process(Mat *srcdst, Ncv32u width, Ncv32u height, NcvBool bFilterRects, NcvBool bLargestFace, HaarClassifierCascadeDescriptor &haar, @@ -281,7 +281,7 @@ int main(int argc, const char** argv) //============================================================================== namedWindow(wndTitle, 1); - Mat gray, frameDisp; + Mat frameDisp; do { diff --git a/samples/gpu/driver_api_multi.cpp b/samples/gpu/driver_api_multi.cpp index 560908ce6..2d743f0e9 100644 --- a/samples/gpu/driver_api_multi.cpp +++ b/samples/gpu/driver_api_multi.cpp @@ -54,14 +54,8 @@ inline void safeCall_(int code, const char* expr, const char* file, int line) // Each GPU is associated with its own context CUcontext contexts[2]; -int main(int argc, char **argv) +int main() { - if (argc > 1) - { - cout << "CUDA driver API sample\n"; - return -1; - } - int num_devices = getCudaEnabledDeviceCount(); if (num_devices < 2) { diff --git a/samples/gpu/driver_api_stereo_multi.cpp b/samples/gpu/driver_api_stereo_multi.cpp index 3c663a5e5..b8f99e810 100644 --- a/samples/gpu/driver_api_stereo_multi.cpp +++ b/samples/gpu/driver_api_stereo_multi.cpp @@ -76,7 +76,7 @@ GpuMat d_result[2]; // CPU result Mat result; -void printHelp() +static void printHelp() { std::cout << "Usage: driver_api_stereo_multi_gpu --left --right \n"; } diff --git a/samples/gpu/opticalflow_nvidia_api.cpp b/samples/gpu/opticalflow_nvidia_api.cpp index 8a149d740..05a37ef69 100644 --- a/samples/gpu/opticalflow_nvidia_api.cpp +++ b/samples/gpu/opticalflow_nvidia_api.cpp @@ -59,7 +59,7 @@ public: class RgbToR { public: - float operator ()(unsigned char b, unsigned char g, unsigned char r) + float operator ()(unsigned char /*b*/, unsigned char /*g*/, unsigned char r) { return static_cast(r)/255.0f; } @@ -69,7 +69,7 @@ public: class RgbToG { public: - float operator ()(unsigned char b, unsigned char g, unsigned char r) + float operator ()(unsigned char /*b*/, unsigned char g, unsigned char /*r*/) { return static_cast(g)/255.0f; } @@ -78,7 +78,7 @@ public: class RgbToB { public: - float operator ()(unsigned char b, unsigned char g, unsigned char r) + float operator ()(unsigned char b, unsigned char /*g*/, unsigned char /*r*/) { return static_cast(b)/255.0f; } @@ -135,7 +135,7 @@ NCVStatus CopyData(const IplImage *image, const NCVMatrixAlloc &dst) return NCV_SUCCESS; } -NCVStatus LoadImages (const char *frame0Name, +static NCVStatus LoadImages (const char *frame0Name, const char *frame1Name, int &width, int &height, @@ -186,7 +186,7 @@ inline T MapValue (T x, T a, T b, T c, T d) return c + (d - c) * (x - a) / (b - a); } -NCVStatus ShowFlow (NCVMatrixAlloc &u, NCVMatrixAlloc &v, const char *name) +static NCVStatus ShowFlow (NCVMatrixAlloc &u, NCVMatrixAlloc &v, const char *name) { IplImage *flowField; @@ -246,7 +246,7 @@ NCVStatus ShowFlow (NCVMatrixAlloc &u, NCVMatrixAlloc &v, const return NCV_SUCCESS; } -IplImage *CreateImage (NCVMatrixAlloc &h_r, NCVMatrixAlloc &h_g, NCVMatrixAlloc &h_b) +static IplImage *CreateImage (NCVMatrixAlloc &h_r, NCVMatrixAlloc &h_g, NCVMatrixAlloc &h_b) { CvSize imageSize = cvSize (h_r.width (), h_r.height ()); IplImage *image = cvCreateImage (imageSize, IPL_DEPTH_8U, 4); @@ -270,7 +270,7 @@ IplImage *CreateImage (NCVMatrixAlloc &h_r, NCVMatrixAlloc &h_g, return image; } -void PrintHelp () +static void PrintHelp () { std::cout << "Usage help:\n"; std::cout << std::setiosflags(std::ios::left); @@ -286,7 +286,7 @@ void PrintHelp () std::cout << "\t" << std::setw(15) << PARAM_HELP << " - display this help message\n"; } -int ProcessCommandLine(int argc, char **argv, +static int ProcessCommandLine(int argc, char **argv, Ncv32f &timeStep, char *&frame0Name, char *&frame1Name, diff --git a/samples/gpu/stereo_multi.cpp b/samples/gpu/stereo_multi.cpp index c7fa5539b..d424bf90b 100644 --- a/samples/gpu/stereo_multi.cpp +++ b/samples/gpu/stereo_multi.cpp @@ -47,7 +47,7 @@ GpuMat d_result[2]; // CPU result Mat result; -void printHelp() +static void printHelp() { std::cout << "Usage: stereo_multi_gpu --left --right \n"; }