From da93a1dab9b84a2dc00489c8c97c5697b9b482a1 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Thu, 13 Dec 2012 13:49:32 +0400 Subject: [PATCH] fixed build for CARMA platform --- cmake/OpenCVDetectCUDA.cmake | 23 +- modules/core/CMakeLists.txt | 1 - modules/core/src/gpumat.cpp | 26 +- modules/core/src/opengl_interop.cpp | 51 +-- modules/gpu/app/nv_perf_test/CMakeLists.txt | 2 +- .../gpu/include/opencv2/gpu/device/common.hpp | 1 - modules/gpu/src/cuda/canny.cu | 50 +-- modules/gpu/src/cuda/ccomponetns.cu | 3 +- modules/gpu/src/cuda/column_filter.h | 335 +++++++++--------- modules/gpu/src/cuda/element_operations.cu | 136 +++---- modules/gpu/src/cuda/gftt.cu | 3 +- modules/gpu/src/cuda/global_motion.cu | 13 +- modules/gpu/src/cuda/hist.cu | 13 +- modules/gpu/src/cuda/hog.cu | 16 +- modules/gpu/src/cuda/hough.cu | 2 + modules/gpu/src/cuda/matrix_reductions.cu | 15 +- modules/gpu/src/cuda/optflowbm.cu | 5 +- modules/gpu/src/cuda/orb.cu | 1 + modules/gpu/src/cuda/pyrlk.cu | 37 +- modules/gpu/src/cuda/row_filter.h | 335 +++++++++--------- modules/gpu/src/cuda/split_merge.cu | 2 +- modules/gpu/src/cuda/tvl1flow.cu | 36 +- modules/gpu/src/imgproc.cpp | 2 +- .../nvidia/TestHaarCascadeApplication.cpp | 14 +- samples/gpu/driver_api_multi.cpp | 8 +- samples/gpu/driver_api_stereo_multi.cpp | 2 +- samples/gpu/softcascade.cpp | 5 +- 27 files changed, 543 insertions(+), 594 deletions(-) diff --git a/cmake/OpenCVDetectCUDA.cmake b/cmake/OpenCVDetectCUDA.cmake index c1cd83866..354ffbf99 100644 --- a/cmake/OpenCVDetectCUDA.cmake +++ b/cmake/OpenCVDetectCUDA.cmake @@ -3,12 +3,12 @@ if(${CMAKE_VERSION} VERSION_LESS "2.8.3") return() endif() -if (WIN32 AND NOT MSVC) +if(WIN32 AND NOT MSVC) message(STATUS "CUDA compilation is disabled (due to only Visual Studio compiler suppoted on your platform).") return() endif() -if (CMAKE_COMPILER_IS_GNUCXX AND NOT APPLE AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang") +if(CMAKE_COMPILER_IS_GNUCXX AND NOT APPLE AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang") message(STATUS "CUDA compilation is disabled (due to Clang unsuppoted on your platform).") return() endif() @@ -72,11 +72,11 @@ if(CUDA_FOUND) # Tell NVCC to add PTX intermediate code for the specified architectures string(REGEX MATCHALL "[0-9]+" ARCH_LIST "${ARCH_PTX_NO_POINTS}") - foreach(ARCH IN LISTS ARCH_LIST) - set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -gencode arch=compute_${ARCH},code=compute_${ARCH}) - set(OPENCV_CUDA_ARCH_PTX "${OPENCV_CUDA_ARCH_PTX} ${ARCH}") - set(OPENCV_CUDA_ARCH_FEATURES "${OPENCV_CUDA_ARCH_FEATURES} ${ARCH}") - endforeach() + foreach(ARCH IN LISTS ARCH_LIST) + set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -gencode arch=compute_${ARCH},code=compute_${ARCH}) + set(OPENCV_CUDA_ARCH_PTX "${OPENCV_CUDA_ARCH_PTX} ${ARCH}") + set(OPENCV_CUDA_ARCH_FEATURES "${OPENCV_CUDA_ARCH_FEATURES} ${ARCH}") + endforeach() # These vars will be processed in other scripts set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ${NVCC_FLAGS_EXTRA}) @@ -84,7 +84,7 @@ if(CUDA_FOUND) message(STATUS "CUDA NVCC target flags: ${CUDA_NVCC_FLAGS}") - OCV_OPTION(CUDA_FAST_MATH "Enable --use_fast_math for CUDA compiler " OFF) + OCV_OPTION(CUDA_FAST_MATH "Enable --use_fast_math for CUDA compiler " OFF) if(CUDA_FAST_MATH) set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --use_fast_math) @@ -92,7 +92,6 @@ if(CUDA_FOUND) mark_as_advanced(CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_VERBOSE_BUILD CUDA_SDK_ROOT_DIR) - unset(CUDA_npp_LIBRARY CACHE) find_cuda_helper_libs(npp) macro(ocv_cuda_compile VAR) @@ -106,15 +105,15 @@ if(CUDA_FOUND) string(REPLACE "-ggdb3" "" ${var} "${${var}}") endforeach() - if (BUILD_SHARED_LIBS) + if(BUILD_SHARED_LIBS) set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Xcompiler -DCVAPI_EXPORTS) endif() if(UNIX OR APPLE) - set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Xcompiler -fPIC) + set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Xcompiler -fPIC) endif() if(APPLE) - set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Xcompiler -fno-finite-math-only) + set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Xcompiler -fno-finite-math-only) endif() # disabled because of multiple warnings during building nvcc auto generated files diff --git a/modules/core/CMakeLists.txt b/modules/core/CMakeLists.txt index cfa14cdcd..4c5112e3f 100644 --- a/modules/core/CMakeLists.txt +++ b/modules/core/CMakeLists.txt @@ -10,7 +10,6 @@ if(HAVE_CUDA) file(GLOB lib_cuda "src/cuda/*.cu") ocv_cuda_compile(cuda_objs ${lib_cuda}) - set(cuda_link_libs ${CUDA_LIBRARIES} ${CUDA_npp_LIBRARY}) else() set(lib_cuda "") diff --git a/modules/core/src/gpumat.cpp b/modules/core/src/gpumat.cpp index 6b5b076cd..49f6de389 100644 --- a/modules/core/src/gpumat.cpp +++ b/modules/core/src/gpumat.cpp @@ -45,8 +45,7 @@ #include #ifdef HAVE_CUDA - #include - #include + #include #include #define CUDART_MINIMUM_REQUIRED_VERSION 4010 @@ -394,18 +393,6 @@ void cv::gpu::DeviceInfo::queryMemory(size_t& free_memory, size_t& total_memory) namespace { - template void getCudaAttribute(T *attribute, CUdevice_attribute device_attribute, int device) - { - *attribute = T(); - //CUresult error = CUDA_SUCCESS;// = cuDeviceGetAttribute( attribute, device_attribute, device ); why link erros under ubuntu?? - CUresult error = cuDeviceGetAttribute( attribute, device_attribute, device ); - if( CUDA_SUCCESS == error ) - return; - - printf("Driver API error = %04d\n", error); - cv::gpu::error("driver API error", __FILE__, __LINE__); - } - int convertSMVer2Cores(int major, int minor) { // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM @@ -466,17 +453,6 @@ void cv::gpu::printCudaDeviceInfo(int device) convertSMVer2Cores(prop.major, prop.minor) * prop.multiProcessorCount); printf(" GPU Clock Speed: %.2f GHz\n", prop.clockRate * 1e-6f); - // This is not available in the CUDA Runtime API, so we make the necessary calls the driver API to support this for output - int memoryClock, memBusWidth, L2CacheSize; - getCudaAttribute( &memoryClock, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, dev ); - getCudaAttribute( &memBusWidth, CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, dev ); - getCudaAttribute( &L2CacheSize, CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, dev ); - - printf(" Memory Clock rate: %.2f Mhz\n", memoryClock * 1e-3f); - printf(" Memory Bus Width: %d-bit\n", memBusWidth); - if (L2CacheSize) - printf(" L2 Cache Size: %d bytes\n", L2CacheSize); - printf(" Max Texture Dimension Size (x,y,z) 1D=(%d), 2D=(%d,%d), 3D=(%d,%d,%d)\n", prop.maxTexture1D, prop.maxTexture2D[0], prop.maxTexture2D[1], prop.maxTexture3D[0], prop.maxTexture3D[1], prop.maxTexture3D[2]); diff --git a/modules/core/src/opengl_interop.cpp b/modules/core/src/opengl_interop.cpp index 86c85589d..befc63f3f 100644 --- a/modules/core/src/opengl_interop.cpp +++ b/modules/core/src/opengl_interop.cpp @@ -44,11 +44,13 @@ #include "opencv2/core/opengl_interop.hpp" #include "opencv2/core/gpumat.hpp" -#include "gl_core_3_1.hpp" +#ifdef HAVE_OPENGL + #include "gl_core_3_1.hpp" -#ifdef HAVE_CUDA - #include - #include + #ifdef HAVE_CUDA + #include + #include + #endif #endif using namespace std; @@ -61,24 +63,24 @@ namespace void throw_nogl() { CV_Error(CV_OpenGlNotSupported, "The library is compiled without OpenGL support"); } #else void throw_nogl() { CV_Error(CV_OpenGlApiCallError, "OpenGL context doesn't exist"); } - #endif - #ifndef HAVE_CUDA - void throw_nocuda() { CV_Error(CV_GpuNotSupported, "The library is compiled without GPU support"); } - #else - void throw_nocuda() { CV_Error(CV_StsNotImplemented, "The called functionality is disabled for current build or platform"); } + #ifndef HAVE_CUDA + void throw_nocuda() { CV_Error(CV_GpuNotSupported, "The library is compiled without GPU support"); } + #else + void throw_nocuda() { CV_Error(CV_StsNotImplemented, "The called functionality is disabled for current build or platform"); } - #if defined(__GNUC__) - #define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__, __func__) - #else /* defined(__CUDACC__) || defined(__MSVC__) */ - #define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__) + #if defined(__GNUC__) + #define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__, __func__) + #else /* defined(__CUDACC__) || defined(__MSVC__) */ + #define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__) + #endif + + void ___cudaSafeCall(cudaError_t err, const char* file, const int line, const char* func = "") + { + if (cudaSuccess != err) + cv::gpu::error(cudaGetErrorString(err), file, line, func); + } #endif - - void ___cudaSafeCall(cudaError_t err, const char* file, const int line, const char* func = "") - { - if (cudaSuccess != err) - cv::gpu::error(cudaGetErrorString(err), file, line, func); - } #endif } @@ -139,11 +141,16 @@ namespace void cv::gpu::setGlDevice(int device) { -#if !defined(HAVE_CUDA) || defined(CUDA_DISABLER) +#ifndef HAVE_OPENGL (void) device; - throw_nocuda(); + throw_nogl(); #else - cudaSafeCall( cudaGLSetGLDevice(device) ); + #if !defined(HAVE_CUDA) || defined(CUDA_DISABLER) + (void) device; + throw_nocuda(); + #else + cudaSafeCall( cudaGLSetGLDevice(device) ); + #endif #endif } diff --git a/modules/gpu/app/nv_perf_test/CMakeLists.txt b/modules/gpu/app/nv_perf_test/CMakeLists.txt index 0793b4493..c13f5ef46 100644 --- a/modules/gpu/app/nv_perf_test/CMakeLists.txt +++ b/modules/gpu/app/nv_perf_test/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 2.8.6) +cmake_minimum_required(VERSION 2.8.3) project(nv_perf_test) diff --git a/modules/gpu/include/opencv2/gpu/device/common.hpp b/modules/gpu/include/opencv2/gpu/device/common.hpp index ffe2c98b8..e44c8442f 100644 --- a/modules/gpu/include/opencv2/gpu/device/common.hpp +++ b/modules/gpu/include/opencv2/gpu/device/common.hpp @@ -100,7 +100,6 @@ namespace cv { namespace gpu typedef unsigned char uchar; typedef unsigned short ushort; typedef signed char schar; - typedef unsigned int uint; template inline void bindTexture(const textureReference* tex, const PtrStepSz& img) { diff --git a/modules/gpu/src/cuda/canny.cu b/modules/gpu/src/cuda/canny.cu index ae9cac831..0a5daebaa 100644 --- a/modules/gpu/src/cuda/canny.cu +++ b/modules/gpu/src/cuda/canny.cu @@ -52,7 +52,7 @@ using namespace cv::gpu; using namespace cv::gpu::device; -namespace +namespace canny { struct L1 : binary_function { @@ -78,17 +78,17 @@ namespace namespace cv { namespace gpu { namespace device { - template <> struct TransformFunctorTraits : DefaultTransformFunctorTraits + template <> struct TransformFunctorTraits : DefaultTransformFunctorTraits { enum { smart_shift = 4 }; }; - template <> struct TransformFunctorTraits : DefaultTransformFunctorTraits + template <> struct TransformFunctorTraits : DefaultTransformFunctorTraits { enum { smart_shift = 4 }; }; }}} -namespace +namespace canny { texture tex_src(false, cudaFilterModePoint, cudaAddressModeClamp); struct SrcTex @@ -104,7 +104,7 @@ namespace }; template __global__ - void calcMagnitude(const SrcTex src, PtrStepi dx, PtrStepi dy, PtrStepSzf mag, const Norm norm) + void calcMagnitudeKernel(const SrcTex src, PtrStepi dx, PtrStepi dy, PtrStepSzf mag, const Norm norm) { const int x = blockIdx.x * blockDim.x + threadIdx.x; const int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -120,10 +120,7 @@ namespace mag(y, x) = norm(dxVal, dyVal); } -} -namespace canny -{ void calcMagnitude(PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, bool L2Grad) { const dim3 block(16, 16); @@ -135,12 +132,12 @@ namespace canny if (L2Grad) { L2 norm; - ::calcMagnitude<<>>(src, dx, dy, mag, norm); + calcMagnitudeKernel<<>>(src, dx, dy, mag, norm); } else { L1 norm; - ::calcMagnitude<<>>(src, dx, dy, mag, norm); + calcMagnitudeKernel<<>>(src, dx, dy, mag, norm); } cudaSafeCall( cudaGetLastError() ); @@ -165,11 +162,11 @@ namespace canny ////////////////////////////////////////////////////////////////////////////////////////// -namespace +namespace canny { texture tex_mag(false, cudaFilterModePoint, cudaAddressModeClamp); - __global__ void calcMap(const PtrStepSzi dx, const PtrStepi dy, PtrStepi map, const float low_thresh, const float high_thresh) + __global__ void calcMapKernel(const PtrStepSzi dx, const PtrStepi dy, PtrStepi map, const float low_thresh, const float high_thresh) { const int CANNY_SHIFT = 15; const int TG22 = (int)(0.4142135623730950488016887242097*(1<>>(dx, dy, map, low_thresh, high_thresh); + calcMapKernel<<>>(dx, dy, map, low_thresh, high_thresh); cudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaDeviceSynchronize() ); @@ -240,11 +234,11 @@ namespace canny ////////////////////////////////////////////////////////////////////////////////////////// -namespace +namespace canny { __device__ int counter = 0; - __global__ void edgesHysteresisLocal(PtrStepSzi map, ushort2* st) + __global__ void edgesHysteresisLocalKernel(PtrStepSzi map, ushort2* st) { __shared__ volatile int smem[18][18]; @@ -325,10 +319,7 @@ namespace st[ind] = make_ushort2(x, y); } } -} -namespace canny -{ void edgesHysteresisLocal(PtrStepSzi map, ushort2* st1) { void* counter_ptr; @@ -339,7 +330,7 @@ namespace canny const dim3 block(16, 16); const dim3 grid(divUp(map.cols, block.x), divUp(map.rows, block.y)); - ::edgesHysteresisLocal<<>>(map, st1); + edgesHysteresisLocalKernel<<>>(map, st1); cudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaDeviceSynchronize() ); @@ -348,12 +339,12 @@ namespace canny ////////////////////////////////////////////////////////////////////////////////////////// -namespace +namespace canny { __constant__ int c_dx[8] = {-1, 0, 1, -1, 1, -1, 0, 1}; __constant__ int c_dy[8] = {-1, -1, -1, 0, 0, 1, 1, 1}; - __global__ void edgesHysteresisGlobal(PtrStepSzi map, ushort2* st1, ushort2* st2, const int count) + __global__ void edgesHysteresisGlobalKernel(PtrStepSzi map, ushort2* st1, ushort2* st2, const int count) { const int stack_size = 512; @@ -439,14 +430,11 @@ namespace st2[ind + i] = s_st[i]; } } -} -namespace canny -{ void edgesHysteresisGlobal(PtrStepSzi map, ushort2* st1, ushort2* st2) { void* counter_ptr; - cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, ::counter) ); + cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, canny::counter) ); int count; cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost) ); @@ -458,7 +446,7 @@ namespace canny const dim3 block(128); const dim3 grid(::min(count, 65535u), divUp(count, 65535), 1); - ::edgesHysteresisGlobal<<>>(map, st1, st2, count); + edgesHysteresisGlobalKernel<<>>(map, st1, st2, count); cudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaDeviceSynchronize() ); @@ -472,7 +460,7 @@ namespace canny ////////////////////////////////////////////////////////////////////////////////////////// -namespace +namespace canny { struct GetEdges : unary_function { @@ -488,7 +476,7 @@ namespace namespace cv { namespace gpu { namespace device { - template <> struct TransformFunctorTraits : DefaultTransformFunctorTraits + template <> struct TransformFunctorTraits : DefaultTransformFunctorTraits { enum { smart_shift = 4 }; }; diff --git a/modules/gpu/src/cuda/ccomponetns.cu b/modules/gpu/src/cuda/ccomponetns.cu index 62e81376a..c094e08c0 100644 --- a/modules/gpu/src/cuda/ccomponetns.cu +++ b/modules/gpu/src/cuda/ccomponetns.cu @@ -497,6 +497,7 @@ namespace cv { namespace gpu { namespace device void labelComponents(const PtrStepSzb& edges, PtrStepSzi comps, int flags, cudaStream_t stream) { + (void) flags; dim3 block(CTA_SIZE_X, CTA_SIZE_Y); dim3 grid(divUp(edges.cols, TILE_COLS), divUp(edges.rows, TILE_ROWS)); @@ -529,4 +530,4 @@ namespace cv { namespace gpu { namespace device } } } } -#endif /* CUDA_DISABLER */ \ No newline at end of file +#endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/column_filter.h b/modules/gpu/src/cuda/column_filter.h index dbcd09fa3..52b910339 100644 --- a/modules/gpu/src/cuda/column_filter.h +++ b/modules/gpu/src/cuda/column_filter.h @@ -49,20 +49,12 @@ using namespace cv::gpu; using namespace cv::gpu::device; -namespace +namespace column_filter { #define MAX_KERNEL_SIZE 32 __constant__ float c_kernel[MAX_KERNEL_SIZE]; - void loadKernel(const float* kernel, int ksize, cudaStream_t stream) - { - if (stream == 0) - cudaSafeCall( cudaMemcpyToSymbol(c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice) ); - else - cudaSafeCall( cudaMemcpyToSymbolAsync(c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) ); - } - template __global__ void linearColumnFilter(const PtrStepSz src, PtrStep dst, const int anchor, const B brd) { @@ -196,182 +188,185 @@ namespace filter { { 0, - ::caller< 1, T, D, BrdColReflect101>, - ::caller< 2, T, D, BrdColReflect101>, - ::caller< 3, T, D, BrdColReflect101>, - ::caller< 4, T, D, BrdColReflect101>, - ::caller< 5, T, D, BrdColReflect101>, - ::caller< 6, T, D, BrdColReflect101>, - ::caller< 7, T, D, BrdColReflect101>, - ::caller< 8, T, D, BrdColReflect101>, - ::caller< 9, T, D, BrdColReflect101>, - ::caller<10, T, D, BrdColReflect101>, - ::caller<11, T, D, BrdColReflect101>, - ::caller<12, T, D, BrdColReflect101>, - ::caller<13, T, D, BrdColReflect101>, - ::caller<14, T, D, BrdColReflect101>, - ::caller<15, T, D, BrdColReflect101>, - ::caller<16, T, D, BrdColReflect101>, - ::caller<17, T, D, BrdColReflect101>, - ::caller<18, T, D, BrdColReflect101>, - ::caller<19, T, D, BrdColReflect101>, - ::caller<20, T, D, BrdColReflect101>, - ::caller<21, T, D, BrdColReflect101>, - ::caller<22, T, D, BrdColReflect101>, - ::caller<23, T, D, BrdColReflect101>, - ::caller<24, T, D, BrdColReflect101>, - ::caller<25, T, D, BrdColReflect101>, - ::caller<26, T, D, BrdColReflect101>, - ::caller<27, T, D, BrdColReflect101>, - ::caller<28, T, D, BrdColReflect101>, - ::caller<29, T, D, BrdColReflect101>, - ::caller<30, T, D, BrdColReflect101>, - ::caller<31, T, D, BrdColReflect101>, - ::caller<32, T, D, BrdColReflect101> + column_filter::caller< 1, T, D, BrdColReflect101>, + column_filter::caller< 2, T, D, BrdColReflect101>, + column_filter::caller< 3, T, D, BrdColReflect101>, + column_filter::caller< 4, T, D, BrdColReflect101>, + column_filter::caller< 5, T, D, BrdColReflect101>, + column_filter::caller< 6, T, D, BrdColReflect101>, + column_filter::caller< 7, T, D, BrdColReflect101>, + column_filter::caller< 8, T, D, BrdColReflect101>, + column_filter::caller< 9, T, D, BrdColReflect101>, + column_filter::caller<10, T, D, BrdColReflect101>, + column_filter::caller<11, T, D, BrdColReflect101>, + column_filter::caller<12, T, D, BrdColReflect101>, + column_filter::caller<13, T, D, BrdColReflect101>, + column_filter::caller<14, T, D, BrdColReflect101>, + column_filter::caller<15, T, D, BrdColReflect101>, + column_filter::caller<16, T, D, BrdColReflect101>, + column_filter::caller<17, T, D, BrdColReflect101>, + column_filter::caller<18, T, D, BrdColReflect101>, + column_filter::caller<19, T, D, BrdColReflect101>, + column_filter::caller<20, T, D, BrdColReflect101>, + column_filter::caller<21, T, D, BrdColReflect101>, + column_filter::caller<22, T, D, BrdColReflect101>, + column_filter::caller<23, T, D, BrdColReflect101>, + column_filter::caller<24, T, D, BrdColReflect101>, + column_filter::caller<25, T, D, BrdColReflect101>, + column_filter::caller<26, T, D, BrdColReflect101>, + column_filter::caller<27, T, D, BrdColReflect101>, + column_filter::caller<28, T, D, BrdColReflect101>, + column_filter::caller<29, T, D, BrdColReflect101>, + column_filter::caller<30, T, D, BrdColReflect101>, + column_filter::caller<31, T, D, BrdColReflect101>, + column_filter::caller<32, T, D, BrdColReflect101> }, { 0, - ::caller< 1, T, D, BrdColReplicate>, - ::caller< 2, T, D, BrdColReplicate>, - ::caller< 3, T, D, BrdColReplicate>, - ::caller< 4, T, D, BrdColReplicate>, - ::caller< 5, T, D, BrdColReplicate>, - ::caller< 6, T, D, BrdColReplicate>, - ::caller< 7, T, D, BrdColReplicate>, - ::caller< 8, T, D, BrdColReplicate>, - ::caller< 9, T, D, BrdColReplicate>, - ::caller<10, T, D, BrdColReplicate>, - ::caller<11, T, D, BrdColReplicate>, - ::caller<12, T, D, BrdColReplicate>, - ::caller<13, T, D, BrdColReplicate>, - ::caller<14, T, D, BrdColReplicate>, - ::caller<15, T, D, BrdColReplicate>, - ::caller<16, T, D, BrdColReplicate>, - ::caller<17, T, D, BrdColReplicate>, - ::caller<18, T, D, BrdColReplicate>, - ::caller<19, T, D, BrdColReplicate>, - ::caller<20, T, D, BrdColReplicate>, - ::caller<21, T, D, BrdColReplicate>, - ::caller<22, T, D, BrdColReplicate>, - ::caller<23, T, D, BrdColReplicate>, - ::caller<24, T, D, BrdColReplicate>, - ::caller<25, T, D, BrdColReplicate>, - ::caller<26, T, D, BrdColReplicate>, - ::caller<27, T, D, BrdColReplicate>, - ::caller<28, T, D, BrdColReplicate>, - ::caller<29, T, D, BrdColReplicate>, - ::caller<30, T, D, BrdColReplicate>, - ::caller<31, T, D, BrdColReplicate>, - ::caller<32, T, D, BrdColReplicate> + column_filter::caller< 1, T, D, BrdColReplicate>, + column_filter::caller< 2, T, D, BrdColReplicate>, + column_filter::caller< 3, T, D, BrdColReplicate>, + column_filter::caller< 4, T, D, BrdColReplicate>, + column_filter::caller< 5, T, D, BrdColReplicate>, + column_filter::caller< 6, T, D, BrdColReplicate>, + column_filter::caller< 7, T, D, BrdColReplicate>, + column_filter::caller< 8, T, D, BrdColReplicate>, + column_filter::caller< 9, T, D, BrdColReplicate>, + column_filter::caller<10, T, D, BrdColReplicate>, + column_filter::caller<11, T, D, BrdColReplicate>, + column_filter::caller<12, T, D, BrdColReplicate>, + column_filter::caller<13, T, D, BrdColReplicate>, + column_filter::caller<14, T, D, BrdColReplicate>, + column_filter::caller<15, T, D, BrdColReplicate>, + column_filter::caller<16, T, D, BrdColReplicate>, + column_filter::caller<17, T, D, BrdColReplicate>, + column_filter::caller<18, T, D, BrdColReplicate>, + column_filter::caller<19, T, D, BrdColReplicate>, + column_filter::caller<20, T, D, BrdColReplicate>, + column_filter::caller<21, T, D, BrdColReplicate>, + column_filter::caller<22, T, D, BrdColReplicate>, + column_filter::caller<23, T, D, BrdColReplicate>, + column_filter::caller<24, T, D, BrdColReplicate>, + column_filter::caller<25, T, D, BrdColReplicate>, + column_filter::caller<26, T, D, BrdColReplicate>, + column_filter::caller<27, T, D, BrdColReplicate>, + column_filter::caller<28, T, D, BrdColReplicate>, + column_filter::caller<29, T, D, BrdColReplicate>, + column_filter::caller<30, T, D, BrdColReplicate>, + column_filter::caller<31, T, D, BrdColReplicate>, + column_filter::caller<32, T, D, BrdColReplicate> }, { 0, - ::caller< 1, T, D, BrdColConstant>, - ::caller< 2, T, D, BrdColConstant>, - ::caller< 3, T, D, BrdColConstant>, - ::caller< 4, T, D, BrdColConstant>, - ::caller< 5, T, D, BrdColConstant>, - ::caller< 6, T, D, BrdColConstant>, - ::caller< 7, T, D, BrdColConstant>, - ::caller< 8, T, D, BrdColConstant>, - ::caller< 9, T, D, BrdColConstant>, - ::caller<10, T, D, BrdColConstant>, - ::caller<11, T, D, BrdColConstant>, - ::caller<12, T, D, BrdColConstant>, - ::caller<13, T, D, BrdColConstant>, - ::caller<14, T, D, BrdColConstant>, - ::caller<15, T, D, BrdColConstant>, - ::caller<16, T, D, BrdColConstant>, - ::caller<17, T, D, BrdColConstant>, - ::caller<18, T, D, BrdColConstant>, - ::caller<19, T, D, BrdColConstant>, - ::caller<20, T, D, BrdColConstant>, - ::caller<21, T, D, BrdColConstant>, - ::caller<22, T, D, BrdColConstant>, - ::caller<23, T, D, BrdColConstant>, - ::caller<24, T, D, BrdColConstant>, - ::caller<25, T, D, BrdColConstant>, - ::caller<26, T, D, BrdColConstant>, - ::caller<27, T, D, BrdColConstant>, - ::caller<28, T, D, BrdColConstant>, - ::caller<29, T, D, BrdColConstant>, - ::caller<30, T, D, BrdColConstant>, - ::caller<31, T, D, BrdColConstant>, - ::caller<32, T, D, BrdColConstant> + column_filter::caller< 1, T, D, BrdColConstant>, + column_filter::caller< 2, T, D, BrdColConstant>, + column_filter::caller< 3, T, D, BrdColConstant>, + column_filter::caller< 4, T, D, BrdColConstant>, + column_filter::caller< 5, T, D, BrdColConstant>, + column_filter::caller< 6, T, D, BrdColConstant>, + column_filter::caller< 7, T, D, BrdColConstant>, + column_filter::caller< 8, T, D, BrdColConstant>, + column_filter::caller< 9, T, D, BrdColConstant>, + column_filter::caller<10, T, D, BrdColConstant>, + column_filter::caller<11, T, D, BrdColConstant>, + column_filter::caller<12, T, D, BrdColConstant>, + column_filter::caller<13, T, D, BrdColConstant>, + column_filter::caller<14, T, D, BrdColConstant>, + column_filter::caller<15, T, D, BrdColConstant>, + column_filter::caller<16, T, D, BrdColConstant>, + column_filter::caller<17, T, D, BrdColConstant>, + column_filter::caller<18, T, D, BrdColConstant>, + column_filter::caller<19, T, D, BrdColConstant>, + column_filter::caller<20, T, D, BrdColConstant>, + column_filter::caller<21, T, D, BrdColConstant>, + column_filter::caller<22, T, D, BrdColConstant>, + column_filter::caller<23, T, D, BrdColConstant>, + column_filter::caller<24, T, D, BrdColConstant>, + column_filter::caller<25, T, D, BrdColConstant>, + column_filter::caller<26, T, D, BrdColConstant>, + column_filter::caller<27, T, D, BrdColConstant>, + column_filter::caller<28, T, D, BrdColConstant>, + column_filter::caller<29, T, D, BrdColConstant>, + column_filter::caller<30, T, D, BrdColConstant>, + column_filter::caller<31, T, D, BrdColConstant>, + column_filter::caller<32, T, D, BrdColConstant> }, { 0, - ::caller< 1, T, D, BrdColReflect>, - ::caller< 2, T, D, BrdColReflect>, - ::caller< 3, T, D, BrdColReflect>, - ::caller< 4, T, D, BrdColReflect>, - ::caller< 5, T, D, BrdColReflect>, - ::caller< 6, T, D, BrdColReflect>, - ::caller< 7, T, D, BrdColReflect>, - ::caller< 8, T, D, BrdColReflect>, - ::caller< 9, T, D, BrdColReflect>, - ::caller<10, T, D, BrdColReflect>, - ::caller<11, T, D, BrdColReflect>, - ::caller<12, T, D, BrdColReflect>, - ::caller<13, T, D, BrdColReflect>, - ::caller<14, T, D, BrdColReflect>, - ::caller<15, T, D, BrdColReflect>, - ::caller<16, T, D, BrdColReflect>, - ::caller<17, T, D, BrdColReflect>, - ::caller<18, T, D, BrdColReflect>, - ::caller<19, T, D, BrdColReflect>, - ::caller<20, T, D, BrdColReflect>, - ::caller<21, T, D, BrdColReflect>, - ::caller<22, T, D, BrdColReflect>, - ::caller<23, T, D, BrdColReflect>, - ::caller<24, T, D, BrdColReflect>, - ::caller<25, T, D, BrdColReflect>, - ::caller<26, T, D, BrdColReflect>, - ::caller<27, T, D, BrdColReflect>, - ::caller<28, T, D, BrdColReflect>, - ::caller<29, T, D, BrdColReflect>, - ::caller<30, T, D, BrdColReflect>, - ::caller<31, T, D, BrdColReflect>, - ::caller<32, T, D, BrdColReflect> + column_filter::caller< 1, T, D, BrdColReflect>, + column_filter::caller< 2, T, D, BrdColReflect>, + column_filter::caller< 3, T, D, BrdColReflect>, + column_filter::caller< 4, T, D, BrdColReflect>, + column_filter::caller< 5, T, D, BrdColReflect>, + column_filter::caller< 6, T, D, BrdColReflect>, + column_filter::caller< 7, T, D, BrdColReflect>, + column_filter::caller< 8, T, D, BrdColReflect>, + column_filter::caller< 9, T, D, BrdColReflect>, + column_filter::caller<10, T, D, BrdColReflect>, + column_filter::caller<11, T, D, BrdColReflect>, + column_filter::caller<12, T, D, BrdColReflect>, + column_filter::caller<13, T, D, BrdColReflect>, + column_filter::caller<14, T, D, BrdColReflect>, + column_filter::caller<15, T, D, BrdColReflect>, + column_filter::caller<16, T, D, BrdColReflect>, + column_filter::caller<17, T, D, BrdColReflect>, + column_filter::caller<18, T, D, BrdColReflect>, + column_filter::caller<19, T, D, BrdColReflect>, + column_filter::caller<20, T, D, BrdColReflect>, + column_filter::caller<21, T, D, BrdColReflect>, + column_filter::caller<22, T, D, BrdColReflect>, + column_filter::caller<23, T, D, BrdColReflect>, + column_filter::caller<24, T, D, BrdColReflect>, + column_filter::caller<25, T, D, BrdColReflect>, + column_filter::caller<26, T, D, BrdColReflect>, + column_filter::caller<27, T, D, BrdColReflect>, + column_filter::caller<28, T, D, BrdColReflect>, + column_filter::caller<29, T, D, BrdColReflect>, + column_filter::caller<30, T, D, BrdColReflect>, + column_filter::caller<31, T, D, BrdColReflect>, + column_filter::caller<32, T, D, BrdColReflect> }, { 0, - ::caller< 1, T, D, BrdColWrap>, - ::caller< 2, T, D, BrdColWrap>, - ::caller< 3, T, D, BrdColWrap>, - ::caller< 4, T, D, BrdColWrap>, - ::caller< 5, T, D, BrdColWrap>, - ::caller< 6, T, D, BrdColWrap>, - ::caller< 7, T, D, BrdColWrap>, - ::caller< 8, T, D, BrdColWrap>, - ::caller< 9, T, D, BrdColWrap>, - ::caller<10, T, D, BrdColWrap>, - ::caller<11, T, D, BrdColWrap>, - ::caller<12, T, D, BrdColWrap>, - ::caller<13, T, D, BrdColWrap>, - ::caller<14, T, D, BrdColWrap>, - ::caller<15, T, D, BrdColWrap>, - ::caller<16, T, D, BrdColWrap>, - ::caller<17, T, D, BrdColWrap>, - ::caller<18, T, D, BrdColWrap>, - ::caller<19, T, D, BrdColWrap>, - ::caller<20, T, D, BrdColWrap>, - ::caller<21, T, D, BrdColWrap>, - ::caller<22, T, D, BrdColWrap>, - ::caller<23, T, D, BrdColWrap>, - ::caller<24, T, D, BrdColWrap>, - ::caller<25, T, D, BrdColWrap>, - ::caller<26, T, D, BrdColWrap>, - ::caller<27, T, D, BrdColWrap>, - ::caller<28, T, D, BrdColWrap>, - ::caller<29, T, D, BrdColWrap>, - ::caller<30, T, D, BrdColWrap>, - ::caller<31, T, D, BrdColWrap>, - ::caller<32, T, D, BrdColWrap> + column_filter::caller< 1, T, D, BrdColWrap>, + column_filter::caller< 2, T, D, BrdColWrap>, + column_filter::caller< 3, T, D, BrdColWrap>, + column_filter::caller< 4, T, D, BrdColWrap>, + column_filter::caller< 5, T, D, BrdColWrap>, + column_filter::caller< 6, T, D, BrdColWrap>, + column_filter::caller< 7, T, D, BrdColWrap>, + column_filter::caller< 8, T, D, BrdColWrap>, + column_filter::caller< 9, T, D, BrdColWrap>, + column_filter::caller<10, T, D, BrdColWrap>, + column_filter::caller<11, T, D, BrdColWrap>, + column_filter::caller<12, T, D, BrdColWrap>, + column_filter::caller<13, T, D, BrdColWrap>, + column_filter::caller<14, T, D, BrdColWrap>, + column_filter::caller<15, T, D, BrdColWrap>, + column_filter::caller<16, T, D, BrdColWrap>, + column_filter::caller<17, T, D, BrdColWrap>, + column_filter::caller<18, T, D, BrdColWrap>, + column_filter::caller<19, T, D, BrdColWrap>, + column_filter::caller<20, T, D, BrdColWrap>, + column_filter::caller<21, T, D, BrdColWrap>, + column_filter::caller<22, T, D, BrdColWrap>, + column_filter::caller<23, T, D, BrdColWrap>, + column_filter::caller<24, T, D, BrdColWrap>, + column_filter::caller<25, T, D, BrdColWrap>, + column_filter::caller<26, T, D, BrdColWrap>, + column_filter::caller<27, T, D, BrdColWrap>, + column_filter::caller<28, T, D, BrdColWrap>, + column_filter::caller<29, T, D, BrdColWrap>, + column_filter::caller<30, T, D, BrdColWrap>, + column_filter::caller<31, T, D, BrdColWrap>, + column_filter::caller<32, T, D, BrdColWrap> } }; - ::loadKernel(kernel, ksize, stream); + if (stream == 0) + cudaSafeCall( cudaMemcpyToSymbol(column_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice) ); + else + cudaSafeCall( cudaMemcpyToSymbolAsync(column_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) ); callers[brd_type][ksize]((PtrStepSz)src, (PtrStepSz)dst, anchor, cc, stream); } diff --git a/modules/gpu/src/cuda/element_operations.cu b/modules/gpu/src/cuda/element_operations.cu index 071ad89ce..8faaca8bf 100644 --- a/modules/gpu/src/cuda/element_operations.cu +++ b/modules/gpu/src/cuda/element_operations.cu @@ -52,7 +52,7 @@ using namespace cv::gpu; using namespace cv::gpu::device; -namespace +namespace arithm { template struct ArithmFuncTraits { @@ -152,7 +152,7 @@ namespace ////////////////////////////////////////////////////////////////////////// // addMat -namespace +namespace arithm { template struct VAdd4; template <> struct VAdd4 : binary_function @@ -336,19 +336,19 @@ namespace namespace cv { namespace gpu { namespace device { - template struct TransformFunctorTraits< VAdd4 > : ArithmFuncTraits + template struct TransformFunctorTraits< arithm::VAdd4 > : arithm::ArithmFuncTraits { }; //////////////////////////////////// - template struct TransformFunctorTraits< VAdd2 > : ArithmFuncTraits + template struct TransformFunctorTraits< arithm::VAdd2 > : arithm::ArithmFuncTraits { }; //////////////////////////////////// - template struct TransformFunctorTraits< AddMat > : ArithmFuncTraits + template struct TransformFunctorTraits< arithm::AddMat > : arithm::ArithmFuncTraits { }; }}} @@ -446,7 +446,7 @@ namespace arithm ////////////////////////////////////////////////////////////////////////// // addScalar -namespace +namespace arithm { template struct AddScalar : unary_function { @@ -463,7 +463,7 @@ namespace namespace cv { namespace gpu { namespace device { - template struct TransformFunctorTraits< AddScalar > : ArithmFuncTraits + template struct TransformFunctorTraits< arithm::AddScalar > : arithm::ArithmFuncTraits { }; }}} @@ -541,7 +541,7 @@ namespace arithm ////////////////////////////////////////////////////////////////////////// // subMat -namespace +namespace arithm { template struct VSub4; template <> struct VSub4 : binary_function @@ -725,19 +725,19 @@ namespace namespace cv { namespace gpu { namespace device { - template struct TransformFunctorTraits< VSub4 > : ArithmFuncTraits + template struct TransformFunctorTraits< arithm::VSub4 > : arithm::ArithmFuncTraits { }; //////////////////////////////////// - template struct TransformFunctorTraits< VSub2 > : ArithmFuncTraits + template struct TransformFunctorTraits< arithm::VSub2 > : arithm::ArithmFuncTraits { }; //////////////////////////////////// - template struct TransformFunctorTraits< SubMat > : ArithmFuncTraits + template struct TransformFunctorTraits< arithm::SubMat > : arithm::ArithmFuncTraits { }; }}} @@ -908,7 +908,7 @@ namespace arithm ////////////////////////////////////////////////////////////////////////// // mulMat -namespace +namespace arithm { struct Mul_8uc4_32f : binary_function { @@ -966,15 +966,15 @@ namespace namespace cv { namespace gpu { namespace device { - template <> struct TransformFunctorTraits : ArithmFuncTraits + template <> struct TransformFunctorTraits : arithm::ArithmFuncTraits { }; - template struct TransformFunctorTraits< Mul > : ArithmFuncTraits + template struct TransformFunctorTraits< arithm::Mul > : arithm::ArithmFuncTraits { }; - template struct TransformFunctorTraits< MulScale > : ArithmFuncTraits + template struct TransformFunctorTraits< arithm::MulScale > : arithm::ArithmFuncTraits { }; }}} @@ -1066,7 +1066,7 @@ namespace arithm ////////////////////////////////////////////////////////////////////////// // mulScalar -namespace +namespace arithm { template struct MulScalar : unary_function { @@ -1083,7 +1083,7 @@ namespace namespace cv { namespace gpu { namespace device { - template struct TransformFunctorTraits< MulScalar > : ArithmFuncTraits + template struct TransformFunctorTraits< arithm::MulScalar > : arithm::ArithmFuncTraits { }; }}} @@ -1157,7 +1157,7 @@ namespace arithm ////////////////////////////////////////////////////////////////////////// // divMat -namespace +namespace arithm { struct Div_8uc4_32f : binary_function { @@ -1234,15 +1234,15 @@ namespace namespace cv { namespace gpu { namespace device { - template <> struct TransformFunctorTraits : ArithmFuncTraits + template <> struct TransformFunctorTraits : arithm::ArithmFuncTraits { }; - template struct TransformFunctorTraits< Div > : ArithmFuncTraits + template struct TransformFunctorTraits< arithm::Div > : arithm::ArithmFuncTraits { }; - template struct TransformFunctorTraits< DivScale > : ArithmFuncTraits + template struct TransformFunctorTraits< arithm::DivScale > : arithm::ArithmFuncTraits { }; }}} @@ -1403,7 +1403,7 @@ namespace arithm ////////////////////////////////////////////////////////////////////////// // divInv -namespace +namespace arithm { template struct DivInv : unary_function { @@ -1420,7 +1420,7 @@ namespace namespace cv { namespace gpu { namespace device { - template struct TransformFunctorTraits< DivInv > : ArithmFuncTraits + template struct TransformFunctorTraits< arithm::DivInv > : arithm::ArithmFuncTraits { }; }}} @@ -1494,7 +1494,7 @@ namespace arithm ////////////////////////////////////////////////////////////////////////// // absDiffMat -namespace +namespace arithm { template struct VAbsDiff4; template <> struct VAbsDiff4 : binary_function @@ -1611,19 +1611,19 @@ namespace namespace cv { namespace gpu { namespace device { - template struct TransformFunctorTraits< VAbsDiff4 > : ArithmFuncTraits + template struct TransformFunctorTraits< arithm::VAbsDiff4 > : arithm::ArithmFuncTraits { }; //////////////////////////////////// - template struct TransformFunctorTraits< VAbsDiff2 > : ArithmFuncTraits + template struct TransformFunctorTraits< arithm::VAbsDiff2 > : arithm::ArithmFuncTraits { }; //////////////////////////////////// - template struct TransformFunctorTraits< AbsDiffMat > : ArithmFuncTraits + template struct TransformFunctorTraits< arithm::AbsDiffMat > : arithm::ArithmFuncTraits { }; }}} @@ -1666,7 +1666,7 @@ namespace arithm ////////////////////////////////////////////////////////////////////////// // absDiffScalar -namespace +namespace arithm { template struct AbsDiffScalar : unary_function { @@ -1684,7 +1684,7 @@ namespace namespace cv { namespace gpu { namespace device { - template struct TransformFunctorTraits< AbsDiffScalar > : ArithmFuncTraits + template struct TransformFunctorTraits< arithm::AbsDiffScalar > : arithm::ArithmFuncTraits { }; }}} @@ -1713,7 +1713,7 @@ namespace arithm namespace cv { namespace gpu { namespace device { - template struct TransformFunctorTraits< abs_func > : ArithmFuncTraits + template struct TransformFunctorTraits< abs_func > : arithm::ArithmFuncTraits { }; }}} @@ -1738,7 +1738,7 @@ namespace arithm ////////////////////////////////////////////////////////////////////////// // sqrMat -namespace +namespace arithm { template struct Sqr : unary_function { @@ -1754,7 +1754,7 @@ namespace namespace cv { namespace gpu { namespace device { - template struct TransformFunctorTraits< Sqr > : ArithmFuncTraits + template struct TransformFunctorTraits< arithm::Sqr > : arithm::ArithmFuncTraits { }; }}} @@ -1781,7 +1781,7 @@ namespace arithm namespace cv { namespace gpu { namespace device { - template struct TransformFunctorTraits< sqrt_func > : ArithmFuncTraits + template struct TransformFunctorTraits< sqrt_func > : arithm::ArithmFuncTraits { }; }}} @@ -1808,7 +1808,7 @@ namespace arithm namespace cv { namespace gpu { namespace device { - template struct TransformFunctorTraits< log_func > : ArithmFuncTraits + template struct TransformFunctorTraits< log_func > : arithm::ArithmFuncTraits { }; }}} @@ -1833,7 +1833,7 @@ namespace arithm ////////////////////////////////////////////////////////////////////////// // expMat -namespace +namespace arithm { template struct Exp : unary_function { @@ -1850,7 +1850,7 @@ namespace namespace cv { namespace gpu { namespace device { - template struct TransformFunctorTraits< Exp > : ArithmFuncTraits + template struct TransformFunctorTraits< arithm::Exp > : arithm::ArithmFuncTraits { }; }}} @@ -1875,7 +1875,7 @@ namespace arithm ////////////////////////////////////////////////////////////////////////////////////// // cmpMat -namespace +namespace arithm { template struct Cmp : binary_function @@ -1890,7 +1890,7 @@ namespace namespace cv { namespace gpu { namespace device { - template struct TransformFunctorTraits< Cmp > : ArithmFuncTraits + template struct TransformFunctorTraits< arithm::Cmp > : arithm::ArithmFuncTraits { }; }}} @@ -1957,7 +1957,7 @@ namespace arithm ////////////////////////////////////////////////////////////////////////////////////// // cmpScalar -namespace +namespace arithm { #define TYPE_VEC(type, cn) typename TypeVec::vec_type @@ -2020,7 +2020,7 @@ namespace namespace cv { namespace gpu { namespace device { - template struct TransformFunctorTraits< CmpScalar > : ArithmFuncTraits + template struct TransformFunctorTraits< arithm::CmpScalar > : arithm::ArithmFuncTraits { }; }}} @@ -2179,19 +2179,19 @@ namespace arithm namespace cv { namespace gpu { namespace device { - template struct TransformFunctorTraits< bit_not > : ArithmFuncTraits + template struct TransformFunctorTraits< bit_not > : arithm::ArithmFuncTraits { }; - template struct TransformFunctorTraits< bit_and > : ArithmFuncTraits + template struct TransformFunctorTraits< bit_and > : arithm::ArithmFuncTraits { }; - template struct TransformFunctorTraits< bit_or > : ArithmFuncTraits + template struct TransformFunctorTraits< bit_or > : arithm::ArithmFuncTraits { }; - template struct TransformFunctorTraits< bit_xor > : ArithmFuncTraits + template struct TransformFunctorTraits< bit_xor > : arithm::ArithmFuncTraits { }; }}} @@ -2252,15 +2252,15 @@ namespace arithm namespace cv { namespace gpu { namespace device { - template struct TransformFunctorTraits< binder2nd< bit_and > > : ArithmFuncTraits + template struct TransformFunctorTraits< binder2nd< bit_and > > : arithm::ArithmFuncTraits { }; - template struct TransformFunctorTraits< binder2nd< bit_or > > : ArithmFuncTraits + template struct TransformFunctorTraits< binder2nd< bit_or > > : arithm::ArithmFuncTraits { }; - template struct TransformFunctorTraits< binder2nd< bit_xor > > : ArithmFuncTraits + template struct TransformFunctorTraits< binder2nd< bit_xor > > : arithm::ArithmFuncTraits { }; }}} @@ -2298,7 +2298,7 @@ namespace arithm ////////////////////////////////////////////////////////////////////////// // min -namespace +namespace arithm { template struct VMin4; template <> struct VMin4 : binary_function @@ -2389,23 +2389,23 @@ namespace namespace cv { namespace gpu { namespace device { - template struct TransformFunctorTraits< VMin4 > : ArithmFuncTraits + template struct TransformFunctorTraits< arithm::VMin4 > : arithm::ArithmFuncTraits { }; //////////////////////////////////// - template struct TransformFunctorTraits< VMin2 > : ArithmFuncTraits + template struct TransformFunctorTraits< arithm::VMin2 > : arithm::ArithmFuncTraits { }; //////////////////////////////////// - template struct TransformFunctorTraits< minimum > : ArithmFuncTraits + template struct TransformFunctorTraits< minimum > : arithm::ArithmFuncTraits { }; - template struct TransformFunctorTraits< binder2nd< minimum > > : ArithmFuncTraits + template struct TransformFunctorTraits< binder2nd< minimum > > : arithm::ArithmFuncTraits { }; }}} @@ -2458,7 +2458,7 @@ namespace arithm ////////////////////////////////////////////////////////////////////////// // max -namespace +namespace arithm { template struct VMax4; template <> struct VMax4 : binary_function @@ -2549,23 +2549,23 @@ namespace namespace cv { namespace gpu { namespace device { - template struct TransformFunctorTraits< VMax4 > : ArithmFuncTraits + template struct TransformFunctorTraits< arithm::VMax4 > : arithm::ArithmFuncTraits { }; //////////////////////////////////// - template struct TransformFunctorTraits< VMax2 > : ArithmFuncTraits + template struct TransformFunctorTraits< arithm::VMax2 > : arithm::ArithmFuncTraits { }; //////////////////////////////////// - template struct TransformFunctorTraits< maximum > : ArithmFuncTraits + template struct TransformFunctorTraits< maximum > : arithm::ArithmFuncTraits { }; - template struct TransformFunctorTraits< binder2nd< maximum > > : ArithmFuncTraits + template struct TransformFunctorTraits< binder2nd< maximum > > : arithm::ArithmFuncTraits { }; }}} @@ -2620,23 +2620,23 @@ namespace arithm namespace cv { namespace gpu { namespace device { - template struct TransformFunctorTraits< thresh_binary_func > : ArithmFuncTraits + template struct TransformFunctorTraits< thresh_binary_func > : arithm::ArithmFuncTraits { }; - template struct TransformFunctorTraits< thresh_binary_inv_func > : ArithmFuncTraits + template struct TransformFunctorTraits< thresh_binary_inv_func > : arithm::ArithmFuncTraits { }; - template struct TransformFunctorTraits< thresh_trunc_func > : ArithmFuncTraits + template struct TransformFunctorTraits< thresh_trunc_func > : arithm::ArithmFuncTraits { }; - template struct TransformFunctorTraits< thresh_to_zero_func > : ArithmFuncTraits + template struct TransformFunctorTraits< thresh_to_zero_func > : arithm::ArithmFuncTraits { }; - template struct TransformFunctorTraits< thresh_to_zero_inv_func > : ArithmFuncTraits + template struct TransformFunctorTraits< thresh_to_zero_inv_func > : arithm::ArithmFuncTraits { }; }}} @@ -2679,7 +2679,7 @@ namespace arithm ////////////////////////////////////////////////////////////////////////// // pow -namespace +namespace arithm { template::is_signed> struct PowOp : unary_function { @@ -2734,7 +2734,7 @@ namespace namespace cv { namespace gpu { namespace device { - template struct TransformFunctorTraits< PowOp > : ArithmFuncTraits + template struct TransformFunctorTraits< arithm::PowOp > : arithm::ArithmFuncTraits { }; }}} @@ -2759,7 +2759,7 @@ namespace arithm ////////////////////////////////////////////////////////////////////////// // addWeighted -namespace +namespace arithm { template struct UseDouble_ { @@ -2809,14 +2809,14 @@ namespace namespace cv { namespace gpu { namespace device { - template struct AddWeightedTraits : DefaultTransformFunctorTraits< AddWeighted > + template struct AddWeightedTraits : DefaultTransformFunctorTraits< arithm::AddWeighted > { }; - template struct AddWeightedTraits : ArithmFuncTraits + template struct AddWeightedTraits : arithm::ArithmFuncTraits { }; - template struct TransformFunctorTraits< AddWeighted > : AddWeightedTraits + template struct TransformFunctorTraits< arithm::AddWeighted > : AddWeightedTraits { }; }}} diff --git a/modules/gpu/src/cuda/gftt.cu b/modules/gpu/src/cuda/gftt.cu index 4c21bd911..cae217e39 100644 --- a/modules/gpu/src/cuda/gftt.cu +++ b/modules/gpu/src/cuda/gftt.cu @@ -47,6 +47,7 @@ #if !defined CUDA_DISABLER +#include #include #include "opencv2/gpu/device/common.hpp" @@ -148,4 +149,4 @@ namespace cv { namespace gpu { namespace device }}} -#endif /* CUDA_DISABLER */ \ No newline at end of file +#endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/global_motion.cu b/modules/gpu/src/cuda/global_motion.cu index c48274a41..2af56568d 100644 --- a/modules/gpu/src/cuda/global_motion.cu +++ b/modules/gpu/src/cuda/global_motion.cu @@ -43,12 +43,11 @@ #if !defined CUDA_DISABLER -#include "thrust/device_ptr.h" -#include "thrust/remove.h" -#include "thrust/functional.h" -#include "internal_shared.hpp" +#include +#include +#include -using namespace thrust; +#include "internal_shared.hpp" namespace cv { namespace gpu { namespace device { namespace globmotion { @@ -64,7 +63,7 @@ int compactPoints(int N, float *points0, float *points1, const uchar *mask) return thrust::remove_if(thrust::make_zip_iterator(thrust::make_tuple(dpoints0, dpoints1)), thrust::make_zip_iterator(thrust::make_tuple(dpoints0 + N, dpoints1 + N)), dmask, thrust::not1(thrust::identity())) - - make_zip_iterator(make_tuple(dpoints0, dpoints1)); + - thrust::make_zip_iterator(make_tuple(dpoints0, dpoints1)); } @@ -117,4 +116,4 @@ void calcWobbleSuppressionMaps( }}}} -#endif /* CUDA_DISABLER */ \ No newline at end of file +#endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/hist.cu b/modules/gpu/src/cuda/hist.cu index 2adc5d5b4..9dd14182b 100644 --- a/modules/gpu/src/cuda/hist.cu +++ b/modules/gpu/src/cuda/hist.cu @@ -51,9 +51,9 @@ using namespace cv::gpu; using namespace cv::gpu::device; -namespace +namespace hist { - __global__ void histogram256(const uchar* src, int cols, int rows, size_t step, int* hist) + __global__ void histogram256Kernel(const uchar* src, int cols, int rows, size_t step, int* hist) { __shared__ int shist[256]; @@ -94,16 +94,13 @@ namespace if (histVal > 0) ::atomicAdd(hist + tid, histVal); } -} -namespace hist -{ void histogram256(PtrStepSzb src, int* hist, cudaStream_t stream) { const dim3 block(32, 8); const dim3 grid(divUp(src.rows, block.y)); - ::histogram256<<>>(src.data, src.cols, src.rows, src.step, hist); + histogram256Kernel<<>>(src.data, src.cols, src.rows, src.step, hist); cudaSafeCall( cudaGetLastError() ); if (stream == 0) @@ -113,7 +110,7 @@ namespace hist ///////////////////////////////////////////////////////////////////////// -namespace +namespace hist { __constant__ int c_lut[256]; @@ -133,7 +130,7 @@ namespace namespace cv { namespace gpu { namespace device { - template <> struct TransformFunctorTraits : DefaultTransformFunctorTraits + template <> struct TransformFunctorTraits : DefaultTransformFunctorTraits { enum { smart_shift = 4 }; }; diff --git a/modules/gpu/src/cuda/hog.cu b/modules/gpu/src/cuda/hog.cu index 6a7e927d1..523e5bf64 100644 --- a/modules/gpu/src/cuda/hog.cu +++ b/modules/gpu/src/cuda/hog.cu @@ -244,15 +244,17 @@ namespace cv { namespace gpu { namespace device return smem[0]; #endif } + else + { + #if __CUDA_ARCH__ >= 300 + if (threadIdx.x == 0) + smem[0] = sum; + #endif - #if __CUDA_ARCH__ >= 300 - if (threadIdx.x == 0) - smem[0] = sum; - #endif + __syncthreads(); - __syncthreads(); - - return smem[0]; + return smem[0]; + } } diff --git a/modules/gpu/src/cuda/hough.cu b/modules/gpu/src/cuda/hough.cu index 4835e3568..c4dfbcb66 100644 --- a/modules/gpu/src/cuda/hough.cu +++ b/modules/gpu/src/cuda/hough.cu @@ -42,7 +42,9 @@ #if !defined CUDA_DISABLER +#include #include + #include "opencv2/gpu/device/common.hpp" #include "opencv2/gpu/device/emulation.hpp" #include "opencv2/gpu/device/vec_math.hpp" diff --git a/modules/gpu/src/cuda/matrix_reductions.cu b/modules/gpu/src/cuda/matrix_reductions.cu index 7a0e8d2fe..d34b38a25 100644 --- a/modules/gpu/src/cuda/matrix_reductions.cu +++ b/modules/gpu/src/cuda/matrix_reductions.cu @@ -55,7 +55,7 @@ using namespace cv::gpu; using namespace cv::gpu::device; -namespace +namespace detail { template struct Unroll; template <> struct Unroll<1> @@ -218,7 +218,7 @@ namespace sum { sum = tid < gridDim.x * gridDim.y ? result[tid] : VecTraits::all(0); - device::reduce(Unroll::template smem_tuple(smem), Unroll::tie(sum), tid, Unroll::op(plus())); + device::reduce(detail::Unroll::template smem_tuple(smem), detail::Unroll::tie(sum), tid, detail::Unroll::op(plus())); if (tid == 0) { @@ -254,7 +254,7 @@ namespace sum { sum = tid < gridDim.x * gridDim.y ? result[tid] : VecTraits::all(0); - device::reduce(Unroll::template smem_tuple(smem), Unroll::tie(sum), tid, Unroll::op(plus())); + device::reduce(detail::Unroll::template smem_tuple(smem), detail::Unroll::tie(sum), tid, detail::Unroll::op(plus())); if (tid == 0) { @@ -294,7 +294,7 @@ namespace sum } } - device::reduce(Unroll::template smem_tuple(smem), Unroll::tie(sum), tid, Unroll::op(plus())); + device::reduce(detail::Unroll::template smem_tuple(smem), detail::Unroll::tie(sum), tid, detail::Unroll::op(plus())); GlobalReduce::run(sum, result, tid, bid, smem); } @@ -918,13 +918,11 @@ namespace countNonZero __global__ void kernel(const PtrStepSz src, unsigned int* count, const int twidth, const int theight) { __shared__ unsigned int scount[BLOCK_SIZE]; - __shared__ bool is_last; const int x0 = blockIdx.x * blockDim.x * twidth + threadIdx.x; const int y0 = blockIdx.y * blockDim.y * theight + threadIdx.y; const int tid = threadIdx.y * blockDim.x + threadIdx.x; - const int bid = blockIdx.y * gridDim.x + blockIdx.x; unsigned int mycount = 0; @@ -946,6 +944,9 @@ namespace countNonZero if (tid == 0) ::atomicAdd(count, mycount); #else + __shared__ bool is_last; + const int bid = blockIdx.y * gridDim.x + blockIdx.x; + if (tid == 0) { count[bid] = mycount; @@ -1244,7 +1245,7 @@ namespace reduce for (int x = threadIdx.x; x < src.cols; x += BLOCK_SIZE) myVal = op(myVal, saturate_cast(srcRow[x])); - device::reduce(Unroll::template smem_tuple(smem), Unroll::tie(myVal), threadIdx.x, Unroll::op(op)); + device::reduce(detail::Unroll::template smem_tuple(smem), detail::Unroll::tie(myVal), threadIdx.x, detail::Unroll::op(op)); if (threadIdx.x == 0) dst[y] = saturate_cast(op.result(myVal, src.cols)); diff --git a/modules/gpu/src/cuda/optflowbm.cu b/modules/gpu/src/cuda/optflowbm.cu index e924170fc..7e4acd900 100644 --- a/modules/gpu/src/cuda/optflowbm.cu +++ b/modules/gpu/src/cuda/optflowbm.cu @@ -48,7 +48,7 @@ using namespace cv::gpu; using namespace cv::gpu::device; -namespace +namespace optflowbm { texture tex_prev(false, cudaFilterModePoint, cudaAddressModeClamp); texture tex_curr(false, cudaFilterModePoint, cudaAddressModeClamp); @@ -145,10 +145,7 @@ namespace velx(i, j) = static_cast(sumx) / countMin; vely(i, j) = static_cast(sumy) / countMin; } -} -namespace optflowbm -{ void calc(PtrStepSzb prev, PtrStepSzb curr, PtrStepSzf velx, PtrStepSzf vely, int2 blockSize, int2 shiftSize, bool usePrevious, int maxX, int maxY, int acceptLevel, int escapeLevel, const short2* ss, int ssCount, cudaStream_t stream) { diff --git a/modules/gpu/src/cuda/orb.cu b/modules/gpu/src/cuda/orb.cu index d66b3e9ec..95706dfa3 100644 --- a/modules/gpu/src/cuda/orb.cu +++ b/modules/gpu/src/cuda/orb.cu @@ -47,6 +47,7 @@ #if !defined CUDA_DISABLER +#include #include #include "opencv2/gpu/device/common.hpp" diff --git a/modules/gpu/src/cuda/pyrlk.cu b/modules/gpu/src/cuda/pyrlk.cu index c0f54bd33..8d746143c 100644 --- a/modules/gpu/src/cuda/pyrlk.cu +++ b/modules/gpu/src/cuda/pyrlk.cu @@ -57,7 +57,7 @@ using namespace cv::gpu; using namespace cv::gpu::device; -namespace +namespace pyrlk { __constant__ int c_winSize_x; __constant__ int c_winSize_y; @@ -123,7 +123,7 @@ namespace } template - __global__ void sparse(const float2* prevPts, float2* nextPts, uchar* status, float* err, const int level, const int rows, const int cols) + __global__ void sparseKernel(const float2* prevPts, float2* nextPts, uchar* status, float* err, const int level, const int rows, const int cols) { #if __CUDA_ARCH__ <= 110 const int BLOCK_SIZE = 128; @@ -321,9 +321,9 @@ namespace dim3 grid(ptcount); if (level == 0 && err) - sparse<<>>(prevPts, nextPts, status, err, level, rows, cols); + sparseKernel<<>>(prevPts, nextPts, status, err, level, rows, cols); else - sparse<<>>(prevPts, nextPts, status, err, level, rows, cols); + sparseKernel<<>>(prevPts, nextPts, status, err, level, rows, cols); cudaSafeCall( cudaGetLastError() ); @@ -332,7 +332,7 @@ namespace } template - __global__ void dense(PtrStepf u, PtrStepf v, const PtrStepf prevU, const PtrStepf prevV, PtrStepf err, const int rows, const int cols) + __global__ void denseKernel(PtrStepf u, PtrStepf v, const PtrStepf prevU, const PtrStepf prevV, PtrStepf err, const int rows, const int cols) { extern __shared__ int smem[]; @@ -476,10 +476,7 @@ namespace err(y, x) = static_cast(errval) / (c_winSize_x * c_winSize_y); } } -} -namespace pyrlk -{ void loadConstants(int2 winSize, int iters) { cudaSafeCall( cudaMemcpyToSymbol(c_winSize_x, &winSize.x, sizeof(int)) ); @@ -500,11 +497,11 @@ namespace pyrlk static const func_t funcs[5][5] = { - {::sparse_caller<1, 1, 1>, ::sparse_caller<1, 2, 1>, ::sparse_caller<1, 3, 1>, ::sparse_caller<1, 4, 1>, ::sparse_caller<1, 5, 1>}, - {::sparse_caller<1, 1, 2>, ::sparse_caller<1, 2, 2>, ::sparse_caller<1, 3, 2>, ::sparse_caller<1, 4, 2>, ::sparse_caller<1, 5, 2>}, - {::sparse_caller<1, 1, 3>, ::sparse_caller<1, 2, 3>, ::sparse_caller<1, 3, 3>, ::sparse_caller<1, 4, 3>, ::sparse_caller<1, 5, 3>}, - {::sparse_caller<1, 1, 4>, ::sparse_caller<1, 2, 4>, ::sparse_caller<1, 3, 4>, ::sparse_caller<1, 4, 4>, ::sparse_caller<1, 5, 4>}, - {::sparse_caller<1, 1, 5>, ::sparse_caller<1, 2, 5>, ::sparse_caller<1, 3, 5>, ::sparse_caller<1, 4, 5>, ::sparse_caller<1, 5, 5>} + {sparse_caller<1, 1, 1>, sparse_caller<1, 2, 1>, sparse_caller<1, 3, 1>, sparse_caller<1, 4, 1>, sparse_caller<1, 5, 1>}, + {sparse_caller<1, 1, 2>, sparse_caller<1, 2, 2>, sparse_caller<1, 3, 2>, sparse_caller<1, 4, 2>, sparse_caller<1, 5, 2>}, + {sparse_caller<1, 1, 3>, sparse_caller<1, 2, 3>, sparse_caller<1, 3, 3>, sparse_caller<1, 4, 3>, sparse_caller<1, 5, 3>}, + {sparse_caller<1, 1, 4>, sparse_caller<1, 2, 4>, sparse_caller<1, 3, 4>, sparse_caller<1, 4, 4>, sparse_caller<1, 5, 4>}, + {sparse_caller<1, 1, 5>, sparse_caller<1, 2, 5>, sparse_caller<1, 3, 5>, sparse_caller<1, 4, 5>, sparse_caller<1, 5, 5>} }; bindTexture(&tex_If, I); @@ -522,11 +519,11 @@ namespace pyrlk static const func_t funcs[5][5] = { - {::sparse_caller<4, 1, 1>, ::sparse_caller<4, 2, 1>, ::sparse_caller<4, 3, 1>, ::sparse_caller<4, 4, 1>, ::sparse_caller<4, 5, 1>}, - {::sparse_caller<4, 1, 2>, ::sparse_caller<4, 2, 2>, ::sparse_caller<4, 3, 2>, ::sparse_caller<4, 4, 2>, ::sparse_caller<4, 5, 2>}, - {::sparse_caller<4, 1, 3>, ::sparse_caller<4, 2, 3>, ::sparse_caller<4, 3, 3>, ::sparse_caller<4, 4, 3>, ::sparse_caller<4, 5, 3>}, - {::sparse_caller<4, 1, 4>, ::sparse_caller<4, 2, 4>, ::sparse_caller<4, 3, 4>, ::sparse_caller<4, 4, 4>, ::sparse_caller<4, 5, 4>}, - {::sparse_caller<4, 1, 5>, ::sparse_caller<4, 2, 5>, ::sparse_caller<4, 3, 5>, ::sparse_caller<4, 4, 5>, ::sparse_caller<4, 5, 5>} + {sparse_caller<4, 1, 1>, sparse_caller<4, 2, 1>, sparse_caller<4, 3, 1>, sparse_caller<4, 4, 1>, sparse_caller<4, 5, 1>}, + {sparse_caller<4, 1, 2>, sparse_caller<4, 2, 2>, sparse_caller<4, 3, 2>, sparse_caller<4, 4, 2>, sparse_caller<4, 5, 2>}, + {sparse_caller<4, 1, 3>, sparse_caller<4, 2, 3>, sparse_caller<4, 3, 3>, sparse_caller<4, 4, 3>, sparse_caller<4, 5, 3>}, + {sparse_caller<4, 1, 4>, sparse_caller<4, 2, 4>, sparse_caller<4, 3, 4>, sparse_caller<4, 4, 4>, sparse_caller<4, 5, 4>}, + {sparse_caller<4, 1, 5>, sparse_caller<4, 2, 5>, sparse_caller<4, 3, 5>, sparse_caller<4, 4, 5>, sparse_caller<4, 5, 5>} }; bindTexture(&tex_If4, I); @@ -551,12 +548,12 @@ namespace pyrlk if (err.data) { - ::dense<<>>(u, v, prevU, prevV, err, I.rows, I.cols); + denseKernel<<>>(u, v, prevU, prevV, err, I.rows, I.cols); cudaSafeCall( cudaGetLastError() ); } else { - ::dense<<>>(u, v, prevU, prevV, PtrStepf(), I.rows, I.cols); + denseKernel<<>>(u, v, prevU, prevV, PtrStepf(), I.rows, I.cols); cudaSafeCall( cudaGetLastError() ); } diff --git a/modules/gpu/src/cuda/row_filter.h b/modules/gpu/src/cuda/row_filter.h index 0da2dfe0c..f2da684cc 100644 --- a/modules/gpu/src/cuda/row_filter.h +++ b/modules/gpu/src/cuda/row_filter.h @@ -49,20 +49,12 @@ using namespace cv::gpu; using namespace cv::gpu::device; -namespace +namespace row_filter { #define MAX_KERNEL_SIZE 32 __constant__ float c_kernel[MAX_KERNEL_SIZE]; - void loadKernel(const float* kernel, int ksize, cudaStream_t stream) - { - if (stream == 0) - cudaSafeCall( cudaMemcpyToSymbol(c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice) ); - else - cudaSafeCall( cudaMemcpyToSymbolAsync(c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) ); - } - template __global__ void linearRowFilter(const PtrStepSz src, PtrStep dst, const int anchor, const B brd) { @@ -195,182 +187,185 @@ namespace filter { { 0, - ::caller< 1, T, D, BrdRowReflect101>, - ::caller< 2, T, D, BrdRowReflect101>, - ::caller< 3, T, D, BrdRowReflect101>, - ::caller< 4, T, D, BrdRowReflect101>, - ::caller< 5, T, D, BrdRowReflect101>, - ::caller< 6, T, D, BrdRowReflect101>, - ::caller< 7, T, D, BrdRowReflect101>, - ::caller< 8, T, D, BrdRowReflect101>, - ::caller< 9, T, D, BrdRowReflect101>, - ::caller<10, T, D, BrdRowReflect101>, - ::caller<11, T, D, BrdRowReflect101>, - ::caller<12, T, D, BrdRowReflect101>, - ::caller<13, T, D, BrdRowReflect101>, - ::caller<14, T, D, BrdRowReflect101>, - ::caller<15, T, D, BrdRowReflect101>, - ::caller<16, T, D, BrdRowReflect101>, - ::caller<17, T, D, BrdRowReflect101>, - ::caller<18, T, D, BrdRowReflect101>, - ::caller<19, T, D, BrdRowReflect101>, - ::caller<20, T, D, BrdRowReflect101>, - ::caller<21, T, D, BrdRowReflect101>, - ::caller<22, T, D, BrdRowReflect101>, - ::caller<23, T, D, BrdRowReflect101>, - ::caller<24, T, D, BrdRowReflect101>, - ::caller<25, T, D, BrdRowReflect101>, - ::caller<26, T, D, BrdRowReflect101>, - ::caller<27, T, D, BrdRowReflect101>, - ::caller<28, T, D, BrdRowReflect101>, - ::caller<29, T, D, BrdRowReflect101>, - ::caller<30, T, D, BrdRowReflect101>, - ::caller<31, T, D, BrdRowReflect101>, - ::caller<32, T, D, BrdRowReflect101> + row_filter::caller< 1, T, D, BrdRowReflect101>, + row_filter::caller< 2, T, D, BrdRowReflect101>, + row_filter::caller< 3, T, D, BrdRowReflect101>, + row_filter::caller< 4, T, D, BrdRowReflect101>, + row_filter::caller< 5, T, D, BrdRowReflect101>, + row_filter::caller< 6, T, D, BrdRowReflect101>, + row_filter::caller< 7, T, D, BrdRowReflect101>, + row_filter::caller< 8, T, D, BrdRowReflect101>, + row_filter::caller< 9, T, D, BrdRowReflect101>, + row_filter::caller<10, T, D, BrdRowReflect101>, + row_filter::caller<11, T, D, BrdRowReflect101>, + row_filter::caller<12, T, D, BrdRowReflect101>, + row_filter::caller<13, T, D, BrdRowReflect101>, + row_filter::caller<14, T, D, BrdRowReflect101>, + row_filter::caller<15, T, D, BrdRowReflect101>, + row_filter::caller<16, T, D, BrdRowReflect101>, + row_filter::caller<17, T, D, BrdRowReflect101>, + row_filter::caller<18, T, D, BrdRowReflect101>, + row_filter::caller<19, T, D, BrdRowReflect101>, + row_filter::caller<20, T, D, BrdRowReflect101>, + row_filter::caller<21, T, D, BrdRowReflect101>, + row_filter::caller<22, T, D, BrdRowReflect101>, + row_filter::caller<23, T, D, BrdRowReflect101>, + row_filter::caller<24, T, D, BrdRowReflect101>, + row_filter::caller<25, T, D, BrdRowReflect101>, + row_filter::caller<26, T, D, BrdRowReflect101>, + row_filter::caller<27, T, D, BrdRowReflect101>, + row_filter::caller<28, T, D, BrdRowReflect101>, + row_filter::caller<29, T, D, BrdRowReflect101>, + row_filter::caller<30, T, D, BrdRowReflect101>, + row_filter::caller<31, T, D, BrdRowReflect101>, + row_filter::caller<32, T, D, BrdRowReflect101> }, { 0, - ::caller< 1, T, D, BrdRowReplicate>, - ::caller< 2, T, D, BrdRowReplicate>, - ::caller< 3, T, D, BrdRowReplicate>, - ::caller< 4, T, D, BrdRowReplicate>, - ::caller< 5, T, D, BrdRowReplicate>, - ::caller< 6, T, D, BrdRowReplicate>, - ::caller< 7, T, D, BrdRowReplicate>, - ::caller< 8, T, D, BrdRowReplicate>, - ::caller< 9, T, D, BrdRowReplicate>, - ::caller<10, T, D, BrdRowReplicate>, - ::caller<11, T, D, BrdRowReplicate>, - ::caller<12, T, D, BrdRowReplicate>, - ::caller<13, T, D, BrdRowReplicate>, - ::caller<14, T, D, BrdRowReplicate>, - ::caller<15, T, D, BrdRowReplicate>, - ::caller<16, T, D, BrdRowReplicate>, - ::caller<17, T, D, BrdRowReplicate>, - ::caller<18, T, D, BrdRowReplicate>, - ::caller<19, T, D, BrdRowReplicate>, - ::caller<20, T, D, BrdRowReplicate>, - ::caller<21, T, D, BrdRowReplicate>, - ::caller<22, T, D, BrdRowReplicate>, - ::caller<23, T, D, BrdRowReplicate>, - ::caller<24, T, D, BrdRowReplicate>, - ::caller<25, T, D, BrdRowReplicate>, - ::caller<26, T, D, BrdRowReplicate>, - ::caller<27, T, D, BrdRowReplicate>, - ::caller<28, T, D, BrdRowReplicate>, - ::caller<29, T, D, BrdRowReplicate>, - ::caller<30, T, D, BrdRowReplicate>, - ::caller<31, T, D, BrdRowReplicate>, - ::caller<32, T, D, BrdRowReplicate> + row_filter::caller< 1, T, D, BrdRowReplicate>, + row_filter::caller< 2, T, D, BrdRowReplicate>, + row_filter::caller< 3, T, D, BrdRowReplicate>, + row_filter::caller< 4, T, D, BrdRowReplicate>, + row_filter::caller< 5, T, D, BrdRowReplicate>, + row_filter::caller< 6, T, D, BrdRowReplicate>, + row_filter::caller< 7, T, D, BrdRowReplicate>, + row_filter::caller< 8, T, D, BrdRowReplicate>, + row_filter::caller< 9, T, D, BrdRowReplicate>, + row_filter::caller<10, T, D, BrdRowReplicate>, + row_filter::caller<11, T, D, BrdRowReplicate>, + row_filter::caller<12, T, D, BrdRowReplicate>, + row_filter::caller<13, T, D, BrdRowReplicate>, + row_filter::caller<14, T, D, BrdRowReplicate>, + row_filter::caller<15, T, D, BrdRowReplicate>, + row_filter::caller<16, T, D, BrdRowReplicate>, + row_filter::caller<17, T, D, BrdRowReplicate>, + row_filter::caller<18, T, D, BrdRowReplicate>, + row_filter::caller<19, T, D, BrdRowReplicate>, + row_filter::caller<20, T, D, BrdRowReplicate>, + row_filter::caller<21, T, D, BrdRowReplicate>, + row_filter::caller<22, T, D, BrdRowReplicate>, + row_filter::caller<23, T, D, BrdRowReplicate>, + row_filter::caller<24, T, D, BrdRowReplicate>, + row_filter::caller<25, T, D, BrdRowReplicate>, + row_filter::caller<26, T, D, BrdRowReplicate>, + row_filter::caller<27, T, D, BrdRowReplicate>, + row_filter::caller<28, T, D, BrdRowReplicate>, + row_filter::caller<29, T, D, BrdRowReplicate>, + row_filter::caller<30, T, D, BrdRowReplicate>, + row_filter::caller<31, T, D, BrdRowReplicate>, + row_filter::caller<32, T, D, BrdRowReplicate> }, { 0, - ::caller< 1, T, D, BrdRowConstant>, - ::caller< 2, T, D, BrdRowConstant>, - ::caller< 3, T, D, BrdRowConstant>, - ::caller< 4, T, D, BrdRowConstant>, - ::caller< 5, T, D, BrdRowConstant>, - ::caller< 6, T, D, BrdRowConstant>, - ::caller< 7, T, D, BrdRowConstant>, - ::caller< 8, T, D, BrdRowConstant>, - ::caller< 9, T, D, BrdRowConstant>, - ::caller<10, T, D, BrdRowConstant>, - ::caller<11, T, D, BrdRowConstant>, - ::caller<12, T, D, BrdRowConstant>, - ::caller<13, T, D, BrdRowConstant>, - ::caller<14, T, D, BrdRowConstant>, - ::caller<15, T, D, BrdRowConstant>, - ::caller<16, T, D, BrdRowConstant>, - ::caller<17, T, D, BrdRowConstant>, - ::caller<18, T, D, BrdRowConstant>, - ::caller<19, T, D, BrdRowConstant>, - ::caller<20, T, D, BrdRowConstant>, - ::caller<21, T, D, BrdRowConstant>, - ::caller<22, T, D, BrdRowConstant>, - ::caller<23, T, D, BrdRowConstant>, - ::caller<24, T, D, BrdRowConstant>, - ::caller<25, T, D, BrdRowConstant>, - ::caller<26, T, D, BrdRowConstant>, - ::caller<27, T, D, BrdRowConstant>, - ::caller<28, T, D, BrdRowConstant>, - ::caller<29, T, D, BrdRowConstant>, - ::caller<30, T, D, BrdRowConstant>, - ::caller<31, T, D, BrdRowConstant>, - ::caller<32, T, D, BrdRowConstant> + row_filter::caller< 1, T, D, BrdRowConstant>, + row_filter::caller< 2, T, D, BrdRowConstant>, + row_filter::caller< 3, T, D, BrdRowConstant>, + row_filter::caller< 4, T, D, BrdRowConstant>, + row_filter::caller< 5, T, D, BrdRowConstant>, + row_filter::caller< 6, T, D, BrdRowConstant>, + row_filter::caller< 7, T, D, BrdRowConstant>, + row_filter::caller< 8, T, D, BrdRowConstant>, + row_filter::caller< 9, T, D, BrdRowConstant>, + row_filter::caller<10, T, D, BrdRowConstant>, + row_filter::caller<11, T, D, BrdRowConstant>, + row_filter::caller<12, T, D, BrdRowConstant>, + row_filter::caller<13, T, D, BrdRowConstant>, + row_filter::caller<14, T, D, BrdRowConstant>, + row_filter::caller<15, T, D, BrdRowConstant>, + row_filter::caller<16, T, D, BrdRowConstant>, + row_filter::caller<17, T, D, BrdRowConstant>, + row_filter::caller<18, T, D, BrdRowConstant>, + row_filter::caller<19, T, D, BrdRowConstant>, + row_filter::caller<20, T, D, BrdRowConstant>, + row_filter::caller<21, T, D, BrdRowConstant>, + row_filter::caller<22, T, D, BrdRowConstant>, + row_filter::caller<23, T, D, BrdRowConstant>, + row_filter::caller<24, T, D, BrdRowConstant>, + row_filter::caller<25, T, D, BrdRowConstant>, + row_filter::caller<26, T, D, BrdRowConstant>, + row_filter::caller<27, T, D, BrdRowConstant>, + row_filter::caller<28, T, D, BrdRowConstant>, + row_filter::caller<29, T, D, BrdRowConstant>, + row_filter::caller<30, T, D, BrdRowConstant>, + row_filter::caller<31, T, D, BrdRowConstant>, + row_filter::caller<32, T, D, BrdRowConstant> }, { 0, - ::caller< 1, T, D, BrdRowReflect>, - ::caller< 2, T, D, BrdRowReflect>, - ::caller< 3, T, D, BrdRowReflect>, - ::caller< 4, T, D, BrdRowReflect>, - ::caller< 5, T, D, BrdRowReflect>, - ::caller< 6, T, D, BrdRowReflect>, - ::caller< 7, T, D, BrdRowReflect>, - ::caller< 8, T, D, BrdRowReflect>, - ::caller< 9, T, D, BrdRowReflect>, - ::caller<10, T, D, BrdRowReflect>, - ::caller<11, T, D, BrdRowReflect>, - ::caller<12, T, D, BrdRowReflect>, - ::caller<13, T, D, BrdRowReflect>, - ::caller<14, T, D, BrdRowReflect>, - ::caller<15, T, D, BrdRowReflect>, - ::caller<16, T, D, BrdRowReflect>, - ::caller<17, T, D, BrdRowReflect>, - ::caller<18, T, D, BrdRowReflect>, - ::caller<19, T, D, BrdRowReflect>, - ::caller<20, T, D, BrdRowReflect>, - ::caller<21, T, D, BrdRowReflect>, - ::caller<22, T, D, BrdRowReflect>, - ::caller<23, T, D, BrdRowReflect>, - ::caller<24, T, D, BrdRowReflect>, - ::caller<25, T, D, BrdRowReflect>, - ::caller<26, T, D, BrdRowReflect>, - ::caller<27, T, D, BrdRowReflect>, - ::caller<28, T, D, BrdRowReflect>, - ::caller<29, T, D, BrdRowReflect>, - ::caller<30, T, D, BrdRowReflect>, - ::caller<31, T, D, BrdRowReflect>, - ::caller<32, T, D, BrdRowReflect> + row_filter::caller< 1, T, D, BrdRowReflect>, + row_filter::caller< 2, T, D, BrdRowReflect>, + row_filter::caller< 3, T, D, BrdRowReflect>, + row_filter::caller< 4, T, D, BrdRowReflect>, + row_filter::caller< 5, T, D, BrdRowReflect>, + row_filter::caller< 6, T, D, BrdRowReflect>, + row_filter::caller< 7, T, D, BrdRowReflect>, + row_filter::caller< 8, T, D, BrdRowReflect>, + row_filter::caller< 9, T, D, BrdRowReflect>, + row_filter::caller<10, T, D, BrdRowReflect>, + row_filter::caller<11, T, D, BrdRowReflect>, + row_filter::caller<12, T, D, BrdRowReflect>, + row_filter::caller<13, T, D, BrdRowReflect>, + row_filter::caller<14, T, D, BrdRowReflect>, + row_filter::caller<15, T, D, BrdRowReflect>, + row_filter::caller<16, T, D, BrdRowReflect>, + row_filter::caller<17, T, D, BrdRowReflect>, + row_filter::caller<18, T, D, BrdRowReflect>, + row_filter::caller<19, T, D, BrdRowReflect>, + row_filter::caller<20, T, D, BrdRowReflect>, + row_filter::caller<21, T, D, BrdRowReflect>, + row_filter::caller<22, T, D, BrdRowReflect>, + row_filter::caller<23, T, D, BrdRowReflect>, + row_filter::caller<24, T, D, BrdRowReflect>, + row_filter::caller<25, T, D, BrdRowReflect>, + row_filter::caller<26, T, D, BrdRowReflect>, + row_filter::caller<27, T, D, BrdRowReflect>, + row_filter::caller<28, T, D, BrdRowReflect>, + row_filter::caller<29, T, D, BrdRowReflect>, + row_filter::caller<30, T, D, BrdRowReflect>, + row_filter::caller<31, T, D, BrdRowReflect>, + row_filter::caller<32, T, D, BrdRowReflect> }, { 0, - ::caller< 1, T, D, BrdRowWrap>, - ::caller< 2, T, D, BrdRowWrap>, - ::caller< 3, T, D, BrdRowWrap>, - ::caller< 4, T, D, BrdRowWrap>, - ::caller< 5, T, D, BrdRowWrap>, - ::caller< 6, T, D, BrdRowWrap>, - ::caller< 7, T, D, BrdRowWrap>, - ::caller< 8, T, D, BrdRowWrap>, - ::caller< 9, T, D, BrdRowWrap>, - ::caller<10, T, D, BrdRowWrap>, - ::caller<11, T, D, BrdRowWrap>, - ::caller<12, T, D, BrdRowWrap>, - ::caller<13, T, D, BrdRowWrap>, - ::caller<14, T, D, BrdRowWrap>, - ::caller<15, T, D, BrdRowWrap>, - ::caller<16, T, D, BrdRowWrap>, - ::caller<17, T, D, BrdRowWrap>, - ::caller<18, T, D, BrdRowWrap>, - ::caller<19, T, D, BrdRowWrap>, - ::caller<20, T, D, BrdRowWrap>, - ::caller<21, T, D, BrdRowWrap>, - ::caller<22, T, D, BrdRowWrap>, - ::caller<23, T, D, BrdRowWrap>, - ::caller<24, T, D, BrdRowWrap>, - ::caller<25, T, D, BrdRowWrap>, - ::caller<26, T, D, BrdRowWrap>, - ::caller<27, T, D, BrdRowWrap>, - ::caller<28, T, D, BrdRowWrap>, - ::caller<29, T, D, BrdRowWrap>, - ::caller<30, T, D, BrdRowWrap>, - ::caller<31, T, D, BrdRowWrap>, - ::caller<32, T, D, BrdRowWrap> + row_filter::caller< 1, T, D, BrdRowWrap>, + row_filter::caller< 2, T, D, BrdRowWrap>, + row_filter::caller< 3, T, D, BrdRowWrap>, + row_filter::caller< 4, T, D, BrdRowWrap>, + row_filter::caller< 5, T, D, BrdRowWrap>, + row_filter::caller< 6, T, D, BrdRowWrap>, + row_filter::caller< 7, T, D, BrdRowWrap>, + row_filter::caller< 8, T, D, BrdRowWrap>, + row_filter::caller< 9, T, D, BrdRowWrap>, + row_filter::caller<10, T, D, BrdRowWrap>, + row_filter::caller<11, T, D, BrdRowWrap>, + row_filter::caller<12, T, D, BrdRowWrap>, + row_filter::caller<13, T, D, BrdRowWrap>, + row_filter::caller<14, T, D, BrdRowWrap>, + row_filter::caller<15, T, D, BrdRowWrap>, + row_filter::caller<16, T, D, BrdRowWrap>, + row_filter::caller<17, T, D, BrdRowWrap>, + row_filter::caller<18, T, D, BrdRowWrap>, + row_filter::caller<19, T, D, BrdRowWrap>, + row_filter::caller<20, T, D, BrdRowWrap>, + row_filter::caller<21, T, D, BrdRowWrap>, + row_filter::caller<22, T, D, BrdRowWrap>, + row_filter::caller<23, T, D, BrdRowWrap>, + row_filter::caller<24, T, D, BrdRowWrap>, + row_filter::caller<25, T, D, BrdRowWrap>, + row_filter::caller<26, T, D, BrdRowWrap>, + row_filter::caller<27, T, D, BrdRowWrap>, + row_filter::caller<28, T, D, BrdRowWrap>, + row_filter::caller<29, T, D, BrdRowWrap>, + row_filter::caller<30, T, D, BrdRowWrap>, + row_filter::caller<31, T, D, BrdRowWrap>, + row_filter::caller<32, T, D, BrdRowWrap> } }; - loadKernel(kernel, ksize, stream); + if (stream == 0) + cudaSafeCall( cudaMemcpyToSymbol(row_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice) ); + else + cudaSafeCall( cudaMemcpyToSymbolAsync(row_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) ); callers[brd_type][ksize]((PtrStepSz)src, (PtrStepSz)dst, anchor, cc, stream); } diff --git a/modules/gpu/src/cuda/split_merge.cu b/modules/gpu/src/cuda/split_merge.cu index a62c76bbe..834b283f0 100644 --- a/modules/gpu/src/cuda/split_merge.cu +++ b/modules/gpu/src/cuda/split_merge.cu @@ -508,4 +508,4 @@ namespace cv { namespace gpu { namespace device }}} // namespace cv { namespace gpu { namespace device -#endif /* CUDA_DISABLER */ \ No newline at end of file +#endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/tvl1flow.cu b/modules/gpu/src/cuda/tvl1flow.cu index dc07d2f2e..27694ad26 100644 --- a/modules/gpu/src/cuda/tvl1flow.cu +++ b/modules/gpu/src/cuda/tvl1flow.cu @@ -52,9 +52,9 @@ using namespace cv::gpu::device; //////////////////////////////////////////////////////////// // centeredGradient -namespace +namespace tvl1flow { - __global__ void centeredGradient(const PtrStepSzf src, PtrStepf dx, PtrStepf dy) + __global__ void centeredGradientKernel(const PtrStepSzf src, PtrStepf dx, PtrStepf dy) { const int x = blockIdx.x * blockDim.x + threadIdx.x; const int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -65,16 +65,13 @@ namespace dx(y, x) = 0.5f * (src(y, ::min(x + 1, src.cols - 1)) - src(y, ::max(x - 1, 0))); dy(y, x) = 0.5f * (src(::min(y + 1, src.rows - 1), x) - src(::max(y - 1, 0), x)); } -} -namespace tvl1flow -{ void centeredGradient(PtrStepSzf src, PtrStepSzf dx, PtrStepSzf dy) { const dim3 block(32, 8); const dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y)); - ::centeredGradient<<>>(src, dx, dy); + centeredGradientKernel<<>>(src, dx, dy); cudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaDeviceSynchronize() ); @@ -84,7 +81,7 @@ namespace tvl1flow //////////////////////////////////////////////////////////// // warpBackward -namespace +namespace tvl1flow { static __device__ __forceinline__ float bicubicCoeff(float x_) { @@ -107,7 +104,7 @@ namespace texture tex_I1x(false, cudaFilterModePoint, cudaAddressModeClamp); texture tex_I1y(false, cudaFilterModePoint, cudaAddressModeClamp); - __global__ void warpBackward(const PtrStepSzf I0, const PtrStepf u1, const PtrStepf u2, PtrStepf I1w, PtrStepf I1wx, PtrStepf I1wy, PtrStepf grad, PtrStepf rho) + __global__ void warpBackwardKernel(const PtrStepSzf I0, const PtrStepf u1, const PtrStepf u2, PtrStepf I1w, PtrStepf I1wx, PtrStepf I1wy, PtrStepf grad, PtrStepf rho) { const int x = blockIdx.x * blockDim.x + threadIdx.x; const int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -166,10 +163,7 @@ namespace const float I0Val = I0(y, x); rho(y, x) = I1wVal - I1wxVal * u1Val - I1wyVal * u2Val - I0Val; } -} -namespace tvl1flow -{ void warpBackward(PtrStepSzf I0, PtrStepSzf I1, PtrStepSzf I1x, PtrStepSzf I1y, PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf I1w, PtrStepSzf I1wx, PtrStepSzf I1wy, PtrStepSzf grad, PtrStepSzf rho) { const dim3 block(32, 8); @@ -179,7 +173,7 @@ namespace tvl1flow bindTexture(&tex_I1x, I1x); bindTexture(&tex_I1y, I1y); - ::warpBackward<<>>(I0, u1, u2, I1w, I1wx, I1wy, grad, rho); + warpBackwardKernel<<>>(I0, u1, u2, I1w, I1wx, I1wy, grad, rho); cudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaDeviceSynchronize() ); @@ -189,7 +183,7 @@ namespace tvl1flow //////////////////////////////////////////////////////////// // estimateU -namespace +namespace tvl1flow { __device__ float divergence(const PtrStepf& v1, const PtrStepf& v2, int y, int x) { @@ -213,7 +207,7 @@ namespace } } - __global__ void estimateU(const PtrStepSzf I1wx, const PtrStepf I1wy, + __global__ void estimateUKernel(const PtrStepSzf I1wx, const PtrStepf I1wy, const PtrStepf grad, const PtrStepf rho_c, const PtrStepf p11, const PtrStepf p12, const PtrStepf p21, const PtrStepf p22, PtrStepf u1, PtrStepf u2, PtrStepf error, @@ -275,10 +269,7 @@ namespace const float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal); error(y, x) = n1 + n2; } -} -namespace tvl1flow -{ void estimateU(PtrStepSzf I1wx, PtrStepSzf I1wy, PtrStepSzf grad, PtrStepSzf rho_c, PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, @@ -288,7 +279,7 @@ namespace tvl1flow const dim3 block(32, 8); const dim3 grid(divUp(I1wx.cols, block.x), divUp(I1wx.rows, block.y)); - ::estimateU<<>>(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, u1, u2, error, l_t, theta); + estimateUKernel<<>>(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, u1, u2, error, l_t, theta); cudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaDeviceSynchronize() ); @@ -298,9 +289,9 @@ namespace tvl1flow //////////////////////////////////////////////////////////// // estimateDualVariables -namespace +namespace tvl1flow { - __global__ void estimateDualVariables(const PtrStepSzf u1, const PtrStepf u2, PtrStepf p11, PtrStepf p12, PtrStepf p21, PtrStepf p22, const float taut) + __global__ void estimateDualVariablesKernel(const PtrStepSzf u1, const PtrStepf u2, PtrStepf p11, PtrStepf p12, PtrStepf p21, PtrStepf p22, const float taut) { const int x = blockIdx.x * blockDim.x + threadIdx.x; const int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -325,16 +316,13 @@ namespace p21(y, x) = (p21(y, x) + taut * u2x) / ng2; p22(y, x) = (p22(y, x) + taut * u2y) / ng2; } -} -namespace tvl1flow -{ void estimateDualVariables(PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, float taut) { const dim3 block(32, 8); const dim3 grid(divUp(u1.cols, block.x), divUp(u1.rows, block.y)); - ::estimateDualVariables<<>>(u1, u2, p11, p12, p21, p22, taut); + estimateDualVariablesKernel<<>>(u1, u2, p11, p12, p21, p22, taut); cudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaDeviceSynchronize() ); diff --git a/modules/gpu/src/imgproc.cpp b/modules/gpu/src/imgproc.cpp index edafd5758..d430eceae 100644 --- a/modules/gpu/src/imgproc.cpp +++ b/modules/gpu/src/imgproc.cpp @@ -551,7 +551,7 @@ void cv::gpu::integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer, S src.locateROI(whole, offset); if (info.supports(WARP_SHUFFLE_FUNCTIONS) && src.cols <= 2048 - && offset.x % 16 == 0 && ((src.cols + 63) / 64) * 64 <= (src.step - offset.x)) + && offset.x % 16 == 0 && ((src.cols + 63) / 64) * 64 <= (static_cast(src.step) - offset.x)) { ensureSizeIsEnough(((src.rows + 7) / 8) * 8, ((src.cols + 63) / 64) * 64, CV_32SC1, buffer); diff --git a/modules/gpu/test/nvidia/TestHaarCascadeApplication.cpp b/modules/gpu/test/nvidia/TestHaarCascadeApplication.cpp index 99913b885..dd410cd51 100644 --- a/modules/gpu/test/nvidia/TestHaarCascadeApplication.cpp +++ b/modules/gpu/test/nvidia/TestHaarCascadeApplication.cpp @@ -210,6 +210,18 @@ bool TestHaarCascadeApplication::process() #if defined(__GNUC__) //http://www.christian-seiler.de/projekte/fpmath/ + #ifndef _FPU_EXTENDED + #define _FPU_EXTENDED 0 + #endif + + #ifndef _FPU_DOUBLE + #define _FPU_DOUBLE 0 + #endif + + #ifndef _FPU_SINGLE + #define _FPU_SINGLE 0 + #endif + fpu_control_t fpu_oldcw, fpu_cw; _FPU_GETCW(fpu_oldcw); // store old cw fpu_cw = (fpu_oldcw & ~_FPU_EXTENDED & ~_FPU_DOUBLE & ~_FPU_SINGLE) | _FPU_SINGLE; @@ -302,4 +314,4 @@ bool TestHaarCascadeApplication::deinit() return true; } -#endif /* CUDA_DISABLER */ \ No newline at end of file +#endif /* CUDA_DISABLER */ diff --git a/samples/gpu/driver_api_multi.cpp b/samples/gpu/driver_api_multi.cpp index 560908ce6..2d743f0e9 100644 --- a/samples/gpu/driver_api_multi.cpp +++ b/samples/gpu/driver_api_multi.cpp @@ -54,14 +54,8 @@ inline void safeCall_(int code, const char* expr, const char* file, int line) // Each GPU is associated with its own context CUcontext contexts[2]; -int main(int argc, char **argv) +int main() { - if (argc > 1) - { - cout << "CUDA driver API sample\n"; - return -1; - } - int num_devices = getCudaEnabledDeviceCount(); if (num_devices < 2) { diff --git a/samples/gpu/driver_api_stereo_multi.cpp b/samples/gpu/driver_api_stereo_multi.cpp index 3c663a5e5..b8f99e810 100644 --- a/samples/gpu/driver_api_stereo_multi.cpp +++ b/samples/gpu/driver_api_stereo_multi.cpp @@ -76,7 +76,7 @@ GpuMat d_result[2]; // CPU result Mat result; -void printHelp() +static void printHelp() { std::cout << "Usage: driver_api_stereo_multi_gpu --left --right \n"; } diff --git a/samples/gpu/softcascade.cpp b/samples/gpu/softcascade.cpp index 3c08fdb1c..fe518504b 100644 --- a/samples/gpu/softcascade.cpp +++ b/samples/gpu/softcascade.cpp @@ -76,8 +76,7 @@ int main(int argc, char** argv) cv::gpu::GpuMat dframe(frame), roi(frame.rows, frame.cols, CV_8UC1), trois; roi.setTo(cv::Scalar::all(1)); - cascade.genRoi(roi, trois); - cascade.detect(dframe, trois, objects); + cascade.detect(dframe, roi, objects); cv::Mat dt(objects); typedef cv::gpu::SCascade::Detection Detection; @@ -103,4 +102,4 @@ int main(int argc, char** argv) } return 0; -} \ No newline at end of file +}