diff --git a/modules/gpulegacy/CMakeLists.txt b/modules/gpulegacy/CMakeLists.txt index 6dd61bd5b..9aa9b3b3e 100644 --- a/modules/gpulegacy/CMakeLists.txt +++ b/modules/gpulegacy/CMakeLists.txt @@ -4,6 +4,6 @@ endif() set(the_description "GPU-accelerated Computer Vision (legacy)") -ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations -Wuninitialized) +ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4130 /wd4324 /wd4512 -Wundef -Wmissing-declarations -Wuninitialized) ocv_define_module(gpulegacy opencv_core OPTIONAL opencv_objdetect) diff --git a/modules/gpulegacy/include/opencv2/gpulegacy/NCVPyramid.hpp b/modules/gpulegacy/include/opencv2/gpulegacy/NCVPyramid.hpp index 8fda836fe..88e2296be 100644 --- a/modules/gpulegacy/include/opencv2/gpulegacy/NCVPyramid.hpp +++ b/modules/gpulegacy/include/opencv2/gpulegacy/NCVPyramid.hpp @@ -52,8 +52,8 @@ namespace cv { namespace gpu { namespace cudev { namespace pyramid { - template void kernelDownsampleX2_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void kernelInterpolateFrom1_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); + CV_EXPORTS void downsampleX2(PtrStepSzb src, PtrStepSzb dst, int depth, int cn, cudaStream_t stream); + CV_EXPORTS void interpolateFrom1(PtrStepSzb src, PtrStepSzb dst, int depth, int cn, cudaStream_t stream); } }}} diff --git a/modules/gpulegacy/src/cuda/NCVHaarObjectDetection.cu b/modules/gpulegacy/src/cuda/NCVHaarObjectDetection.cu index da34ba731..c8aaaeace 100644 --- a/modules/gpulegacy/src/cuda/NCVHaarObjectDetection.cu +++ b/modules/gpulegacy/src/cuda/NCVHaarObjectDetection.cu @@ -66,6 +66,7 @@ #ifdef HAVE_OPENCV_OBJDETECT # include "opencv2/objdetect.hpp" +# include "opencv2/objdetect/objdetect_c.h" #endif #include "opencv2/gpulegacy/NCV.hpp" @@ -2130,7 +2131,7 @@ static NCVStatus loadFromXML(const cv::String &filename, haar.ClassifierSize.height = 0; haar.bHasStumpsOnly = true; haar.bNeedsTiltedII = false; - Ncv32u curMaxTreeDepth; + Ncv32u curMaxTreeDepth = 0; std::vector h_TmpClassifierNotRootNodes; haarStages.resize(0); diff --git a/modules/gpulegacy/src/cuda/NCVPyramid.cu b/modules/gpulegacy/src/cuda/NCVPyramid.cu index acc4441b1..d42b46bcb 100644 --- a/modules/gpulegacy/src/cuda/NCVPyramid.cu +++ b/modules/gpulegacy/src/cuda/NCVPyramid.cu @@ -223,17 +223,25 @@ namespace cv { namespace gpu { namespace cudev cudaSafeCall( cudaDeviceSynchronize() ); } - template void kernelDownsampleX2_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void kernelDownsampleX2_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void kernelDownsampleX2_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); + void downsampleX2(PtrStepSzb src, PtrStepSzb dst, int depth, int cn, cudaStream_t stream) + { + typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void kernelDownsampleX2_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void kernelDownsampleX2_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void kernelDownsampleX2_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); + static const func_t funcs[6][4] = + { + {kernelDownsampleX2_gpu , 0 /*kernelDownsampleX2_gpu*/ , kernelDownsampleX2_gpu , kernelDownsampleX2_gpu }, + {0 /*kernelDownsampleX2_gpu*/ , 0 /*kernelDownsampleX2_gpu*/ , 0 /*kernelDownsampleX2_gpu*/ , 0 /*kernelDownsampleX2_gpu*/ }, + {kernelDownsampleX2_gpu , 0 /*kernelDownsampleX2_gpu*/, kernelDownsampleX2_gpu , kernelDownsampleX2_gpu }, + {0 /*kernelDownsampleX2_gpu*/ , 0 /*kernelDownsampleX2_gpu*/ , 0 /*kernelDownsampleX2_gpu*/, 0 /*kernelDownsampleX2_gpu*/}, + {0 /*kernelDownsampleX2_gpu*/ , 0 /*kernelDownsampleX2_gpu*/ , 0 /*kernelDownsampleX2_gpu*/ , 0 /*kernelDownsampleX2_gpu*/ }, + {kernelDownsampleX2_gpu , 0 /*kernelDownsampleX2_gpu*/ , kernelDownsampleX2_gpu , kernelDownsampleX2_gpu } + }; - template void kernelDownsampleX2_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void kernelDownsampleX2_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void kernelDownsampleX2_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); + const func_t func = funcs[depth][cn - 1]; + CV_Assert(func != 0); + + func(src, dst, stream); + } } }}} @@ -298,17 +306,25 @@ namespace cv { namespace gpu { namespace cudev cudaSafeCall( cudaDeviceSynchronize() ); } - template void kernelInterpolateFrom1_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void kernelInterpolateFrom1_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void kernelInterpolateFrom1_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); + void interpolateFrom1(PtrStepSzb src, PtrStepSzb dst, int depth, int cn, cudaStream_t stream) + { + typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void kernelInterpolateFrom1_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void kernelInterpolateFrom1_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void kernelInterpolateFrom1_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); + static const func_t funcs[6][4] = + { + {kernelInterpolateFrom1_gpu , 0 /*kernelInterpolateFrom1_gpu*/ , kernelInterpolateFrom1_gpu , kernelInterpolateFrom1_gpu }, + {0 /*kernelInterpolateFrom1_gpu*/ , 0 /*kernelInterpolateFrom1_gpu*/ , 0 /*kernelInterpolateFrom1_gpu*/ , 0 /*kernelInterpolateFrom1_gpu*/ }, + {kernelInterpolateFrom1_gpu , 0 /*kernelInterpolateFrom1_gpu*/, kernelInterpolateFrom1_gpu , kernelInterpolateFrom1_gpu }, + {0 /*kernelInterpolateFrom1_gpu*/, 0 /*kernelInterpolateFrom1_gpu*/ , 0 /*kernelInterpolateFrom1_gpu*/, 0 /*kernelInterpolateFrom1_gpu*/}, + {0 /*kernelInterpolateFrom1_gpu*/ , 0 /*kernelInterpolateFrom1_gpu*/ , 0 /*kernelInterpolateFrom1_gpu*/ , 0 /*kernelInterpolateFrom1_gpu*/ }, + {kernelInterpolateFrom1_gpu , 0 /*kernelInterpolateFrom1_gpu*/ , kernelInterpolateFrom1_gpu , kernelInterpolateFrom1_gpu } + }; - template void kernelInterpolateFrom1_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void kernelInterpolateFrom1_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void kernelInterpolateFrom1_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); + const func_t func = funcs[depth][cn - 1]; + CV_Assert(func != 0); + + func(src, dst, stream); + } } }}} diff --git a/modules/gpulegacy/test/main_nvidia.cpp b/modules/gpulegacy/test/main_nvidia.cpp index 1179b5b96..0c82a1abf 100644 --- a/modules/gpulegacy/test/main_nvidia.cpp +++ b/modules/gpulegacy/test/main_nvidia.cpp @@ -349,7 +349,7 @@ bool nvidia_NPPST_Resize(const std::string& test_data_path, OutputLevel outputLe NCVAutoTestLister testListerResize("NPPST Resize", outputLevel); NCVTestSourceProvider testSrcRandom_32u(2010, 0, 0xFFFFFFFF, 2048, 2048); - NCVTestSourceProvider testSrcRandom_64u(2010, 0, -1, 2048, 2048); + NCVTestSourceProvider testSrcRandom_64u(2010, 0, (Ncv64u) -1, 2048, 2048); generateResizeTests(testListerResize, testSrcRandom_32u); generateResizeTests(testListerResize, testSrcRandom_64u); @@ -379,7 +379,7 @@ bool nvidia_NPPST_Transpose(const std::string& test_data_path, OutputLevel outpu NCVAutoTestLister testListerTranspose("NPPST Transpose", outputLevel); NCVTestSourceProvider testSrcRandom_32u(2010, 0, 0xFFFFFFFF, 2048, 2048); - NCVTestSourceProvider testSrcRandom_64u(2010, 0, -1, 2048, 2048); + NCVTestSourceProvider testSrcRandom_64u(2010, 0, (Ncv64u) -1, 2048, 2048); generateTransposeTests(testListerTranspose, testSrcRandom_32u); generateTransposeTests(testListerTranspose, testSrcRandom_64u); diff --git a/modules/gpuwarping/src/pyramids.cpp b/modules/gpuwarping/src/pyramids.cpp index 91b568d70..db9dd611a 100644 --- a/modules/gpuwarping/src/pyramids.cpp +++ b/modules/gpuwarping/src/pyramids.cpp @@ -140,25 +140,8 @@ void cv::gpu::ImagePyramid::build(const GpuMat& img, int numLayers, Stream& stre (void) stream; throw_no_cuda(); #else - using namespace cv::gpu::cudev::pyramid; - - typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - - static const func_t funcs[6][4] = - { - {kernelDownsampleX2_gpu , 0 /*kernelDownsampleX2_gpu*/ , kernelDownsampleX2_gpu , kernelDownsampleX2_gpu }, - {0 /*kernelDownsampleX2_gpu*/ , 0 /*kernelDownsampleX2_gpu*/ , 0 /*kernelDownsampleX2_gpu*/ , 0 /*kernelDownsampleX2_gpu*/ }, - {kernelDownsampleX2_gpu , 0 /*kernelDownsampleX2_gpu*/, kernelDownsampleX2_gpu , kernelDownsampleX2_gpu }, - {0 /*kernelDownsampleX2_gpu*/ , 0 /*kernelDownsampleX2_gpu*/ , 0 /*kernelDownsampleX2_gpu*/, 0 /*kernelDownsampleX2_gpu*/}, - {0 /*kernelDownsampleX2_gpu*/ , 0 /*kernelDownsampleX2_gpu*/ , 0 /*kernelDownsampleX2_gpu*/ , 0 /*kernelDownsampleX2_gpu*/ }, - {kernelDownsampleX2_gpu , 0 /*kernelDownsampleX2_gpu*/ , kernelDownsampleX2_gpu , kernelDownsampleX2_gpu } - }; - CV_Assert(img.depth() <= CV_32F && img.channels() <= 4); - const func_t func = funcs[img.depth()][img.channels() - 1]; - CV_Assert(func != 0); - layer0_ = img; Size szLastLayer = img.size(); nLayers_ = 1; @@ -180,7 +163,7 @@ void cv::gpu::ImagePyramid::build(const GpuMat& img, int numLayers, Stream& stre const GpuMat& prevLayer = i == 0 ? layer0_ : pyramid_[i - 1]; - func(prevLayer, pyramid_[i], StreamAccessor::getStream(stream)); + cudev::pyramid::downsampleX2(prevLayer, pyramid_[i], img.depth(), img.channels(), StreamAccessor::getStream(stream)); szLastLayer = szCurLayer; } @@ -195,27 +178,10 @@ void cv::gpu::ImagePyramid::getLayer(GpuMat& outImg, Size outRoi, Stream& stream (void) stream; throw_no_cuda(); #else - using namespace cv::gpu::cudev::pyramid; - - typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - - static const func_t funcs[6][4] = - { - {kernelInterpolateFrom1_gpu , 0 /*kernelInterpolateFrom1_gpu*/ , kernelInterpolateFrom1_gpu , kernelInterpolateFrom1_gpu }, - {0 /*kernelInterpolateFrom1_gpu*/ , 0 /*kernelInterpolateFrom1_gpu*/ , 0 /*kernelInterpolateFrom1_gpu*/ , 0 /*kernelInterpolateFrom1_gpu*/ }, - {kernelInterpolateFrom1_gpu , 0 /*kernelInterpolateFrom1_gpu*/, kernelInterpolateFrom1_gpu , kernelInterpolateFrom1_gpu }, - {0 /*kernelInterpolateFrom1_gpu*/, 0 /*kernelInterpolateFrom1_gpu*/ , 0 /*kernelInterpolateFrom1_gpu*/, 0 /*kernelInterpolateFrom1_gpu*/}, - {0 /*kernelInterpolateFrom1_gpu*/ , 0 /*kernelInterpolateFrom1_gpu*/ , 0 /*kernelInterpolateFrom1_gpu*/ , 0 /*kernelInterpolateFrom1_gpu*/ }, - {kernelInterpolateFrom1_gpu , 0 /*kernelInterpolateFrom1_gpu*/ , kernelInterpolateFrom1_gpu , kernelInterpolateFrom1_gpu } - }; - CV_Assert(outRoi.width <= layer0_.cols && outRoi.height <= layer0_.rows && outRoi.width > 0 && outRoi.height > 0); ensureSizeIsEnough(outRoi, layer0_.type(), outImg); - const func_t func = funcs[outImg.depth()][outImg.channels() - 1]; - CV_Assert(func != 0); - if (outRoi.width == layer0_.cols && outRoi.height == layer0_.rows) { if (stream) @@ -249,7 +215,7 @@ void cv::gpu::ImagePyramid::getLayer(GpuMat& outImg, Size outRoi, Stream& stream lastLayer = curLayer; } - func(lastLayer, outImg, StreamAccessor::getStream(stream)); + cudev::pyramid::interpolateFrom1(lastLayer, outImg, outImg.depth(), outImg.channels(), StreamAccessor::getStream(stream)); #endif }