From d1fc3e6b5aacdf87435c478938b90c44e323464c Mon Sep 17 00:00:00 2001 From: Anatoly Baksheev Date: Tue, 10 Aug 2010 09:44:50 +0000 Subject: [PATCH] cv::gpu::CudaStream -> cv::gpu::Stream some refactoring added gpu module to compilation --- OpenCVConfig.cmake.in | 2 +- modules/CMakeLists.txt | 2 +- modules/gpu/include/opencv2/gpu/gpu.hpp | 36 +++++----- .../include/opencv2/gpu/stream_accessor.hpp | 2 +- modules/gpu/src/beliefpropagation_gpu.cpp | 4 +- modules/gpu/src/cuda/imgproc.cu | 67 ++++++++++-------- modules/gpu/src/cudastream.cpp | 70 +++++++++---------- modules/gpu/src/matrix_operations.cpp | 4 +- modules/gpu/src/stereobm_gpu.cpp | 4 +- tests/gpu/src/operator_async_call.cpp | 2 +- 10 files changed, 101 insertions(+), 92 deletions(-) diff --git a/OpenCVConfig.cmake.in b/OpenCVConfig.cmake.in index 812711fd9..e6ed61bf3 100644 --- a/OpenCVConfig.cmake.in +++ b/OpenCVConfig.cmake.in @@ -43,7 +43,7 @@ SET(OpenCV_LIB_DIR "@CMAKE_LIB_DIRS_CONFIGCMAKE@") # ==================================================================== # Link libraries: e.g. opencv_core220.so, opencv_imgproc220d.lib, etc... # ==================================================================== -set(OPENCV_LIB_COMPONENTS opencv_core opencv_imgproc opencv_features2d opencv_calib3d opencv_objdetect opencv_video opencv_highgui opencv_ml opencv_legacy opencv_contrib) +set(OPENCV_LIB_COMPONENTS opencv_core opencv_imgproc opencv_features2d opencv_gpu opencv_calib3d opencv_objdetect opencv_video opencv_highgui opencv_ml opencv_legacy opencv_contrib) SET(OpenCV_LIBS "") foreach(__CVLIB ${OPENCV_LIB_COMPONENTS}) # CMake>=2.6 supports the notation "debug XXd optimized XX" diff --git a/modules/CMakeLists.txt b/modules/CMakeLists.txt index 0a6df840d..a9e3eb932 100644 --- a/modules/CMakeLists.txt +++ b/modules/CMakeLists.txt @@ -24,4 +24,4 @@ add_subdirectory(haartraining) add_subdirectory(traincascade) -#add_subdirectory(gpu) +add_subdirectory(gpu) diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index 41daf0766..f389f8fd5 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -67,7 +67,7 @@ namespace cv CV_EXPORTS void getGpuMemInfo(size_t *free, size_t* total); //////////////////////////////// GpuMat //////////////////////////////// - class CudaStream; + class Stream; class MatPL; //! Smart pointer for GPU memory with reference counting. Its interface is mostly similar with cv::Mat. @@ -111,12 +111,12 @@ namespace cv //! pefroms blocking upload data to GpuMat. . void upload(const cv::Mat& m); - void upload(const MatPL& m, CudaStream& stream); + void upload(const MatPL& m, Stream& stream); //! Downloads data from device to host memory. Blocking calls. operator Mat() const; void download(cv::Mat& m) const; - void download(MatPL& m, CudaStream& stream) const; + void download(MatPL& m, Stream& stream) const; //! returns a new GpuMatrix header for the specified row GpuMat row(int y) const; @@ -291,14 +291,14 @@ namespace cv // Passed to each function that supports async kernel execution. // Reference counting is enabled - class CV_EXPORTS CudaStream + class CV_EXPORTS Stream { public: - CudaStream(); - ~CudaStream(); + Stream(); + ~Stream(); - CudaStream(const CudaStream&); - CudaStream& operator=(const CudaStream&); + Stream(const Stream&); + Stream& operator=(const Stream&); bool queryIfComplete(); void waitForCompletion(); @@ -355,7 +355,7 @@ namespace cv void operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity); //! Acync version - void operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity, const CudaStream & stream); + void operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity, const Stream & stream); //! Some heuristics that tries to estmate // if current GPU will be faster then CPU in this algorithm. @@ -390,18 +390,18 @@ namespace cv enum { DEFAULT_LEVELS = 5 }; //! the default constructor - explicit StereoBeliefPropagation_GPU(int ndisp_ = DEFAULT_NDISP, - int iters_ = DEFAULT_ITERS, - int levels_ = DEFAULT_LEVELS, - int msg_type_ = MSG_TYPE_AUTO, + explicit StereoBeliefPropagation_GPU(int ndisp = DEFAULT_NDISP, + int iters = DEFAULT_ITERS, + int levels = DEFAULT_LEVELS, + int msg_type = MSG_TYPE_AUTO, float msg_scale = 1.0f); //! the full constructor taking the number of disparities, number of BP iterations on each level, //! number of levels, truncation of data cost, data weight, //! truncation of discontinuity cost and discontinuity single jump - StereoBeliefPropagation_GPU(int ndisp_, int iters_, int levels_, - float max_data_term_, float data_weight_, - float max_disc_term_, float disc_single_jump_, - int msg_type_ = MSG_TYPE_AUTO, + StereoBeliefPropagation_GPU(int ndisp, int iters, int levels, + float max_data_term, float data_weight, + float max_disc_term, float disc_single_jump, + int msg_type = MSG_TYPE_AUTO, float msg_scale = 1.0f); //! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair, @@ -409,7 +409,7 @@ namespace cv void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disparity); //! Acync version - void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disparity, const CudaStream& stream); + void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disparity, const Stream& stream); //! Some heuristics that tries to estmate //! if current GPU will be faster then CPU in this algorithm. diff --git a/modules/gpu/include/opencv2/gpu/stream_accessor.hpp b/modules/gpu/include/opencv2/gpu/stream_accessor.hpp index 389b7cd55..cd92ca0f7 100644 --- a/modules/gpu/include/opencv2/gpu/stream_accessor.hpp +++ b/modules/gpu/include/opencv2/gpu/stream_accessor.hpp @@ -56,7 +56,7 @@ namespace cv // In this case you have to install Cuda Toolkit. struct StreamAccessor { - CV_EXPORTS static cudaStream_t getStream(const CudaStream& stream); + CV_EXPORTS static cudaStream_t getStream(const Stream& stream); }; } } diff --git a/modules/gpu/src/beliefpropagation_gpu.cpp b/modules/gpu/src/beliefpropagation_gpu.cpp index 3ebf246c3..6d1a20250 100644 --- a/modules/gpu/src/beliefpropagation_gpu.cpp +++ b/modules/gpu/src/beliefpropagation_gpu.cpp @@ -52,7 +52,7 @@ cv::gpu::StereoBeliefPropagation_GPU::StereoBeliefPropagation_GPU(int, int, int, cv::gpu::StereoBeliefPropagation_GPU::StereoBeliefPropagation_GPU(int, int, int, float, float, float, float, int, float) { throw_nogpu(); } void cv::gpu::StereoBeliefPropagation_GPU::operator()(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); } -void cv::gpu::StereoBeliefPropagation_GPU::operator()(const GpuMat&, const GpuMat&, GpuMat&, const CudaStream&) { throw_nogpu(); } +void cv::gpu::StereoBeliefPropagation_GPU::operator()(const GpuMat&, const GpuMat&, GpuMat&, const Stream&) { throw_nogpu(); } bool cv::gpu::StereoBeliefPropagation_GPU::checkIfGpuCallReasonable() { throw_nogpu(); return false; } @@ -282,7 +282,7 @@ void cv::gpu::StereoBeliefPropagation_GPU::operator()(const GpuMat& left, const ::stereo_bp_gpu_operator(ndisp, iters, levels, max_data_term, data_weight, max_disc_term, disc_single_jump, msg_type, msg_scale, u, d, l, r, u2, d2, l2, r2, datas, out, left, right, disp, 0); } -void cv::gpu::StereoBeliefPropagation_GPU::operator()(const GpuMat& left, const GpuMat& right, GpuMat& disp, const CudaStream& stream) +void cv::gpu::StereoBeliefPropagation_GPU::operator()(const GpuMat& left, const GpuMat& right, GpuMat& disp, const Stream& stream) { ::stereo_bp_gpu_operator(ndisp, iters, levels, max_data_term, data_weight, max_disc_term, disc_single_jump, msg_type, msg_scale, u, d, l, r, u2, d2, l2, r2, datas, out, left, right, disp, StreamAccessor::getStream(stream)); } diff --git a/modules/gpu/src/cuda/imgproc.cu b/modules/gpu/src/cuda/imgproc.cu index b475a22b6..a3ec30208 100644 --- a/modules/gpu/src/cuda/imgproc.cu +++ b/modules/gpu/src/cuda/imgproc.cu @@ -44,9 +44,11 @@ using namespace cv::gpu; + +/////////////////////////////////// Remap /////////////////////////////////////////////// namespace imgproc { - texture tex1; + texture tex_remap; __global__ void kernel_remap(const float *mapx, const float *mapy, size_t map_step, unsigned char* out, size_t out_step, int width, int height) { @@ -58,12 +60,40 @@ namespace imgproc float xcoo = mapx[idx]; float ycoo = mapy[idx]; - - out[y * out_step + x] = (unsigned char)(255.f * tex2D(tex1, xcoo, ycoo)); + + out[y * out_step + x] = (unsigned char)(255.f * tex2D(tex_remap, xcoo, ycoo)); } } - texture< uchar4, 2, cudaReadModeElementType > tex_meanshift; +} + +namespace cv { namespace gpu { namespace impl +{ + extern "C" void remap_gpu(const DevMem2D& src, const DevMem2D_& xmap, const DevMem2D_& ymap, DevMem2D dst) + { + dim3 block(16, 16, 1); + dim3 grid(1, 1, 1); + grid.x = divUp(dst.cols, block.x); + grid.y = divUp(dst.rows, block.y); + + imgproc::tex_remap.filterMode = cudaFilterModeLinear; + imgproc::tex_remap.addressMode[0] = imgproc::tex_remap.addressMode[1] = cudaAddressModeWrap; + cudaChannelFormatDesc desc = cudaCreateChannelDesc(); + cudaSafeCall( cudaBindTexture2D(0, imgproc::tex_remap, src.ptr, desc, dst.cols, dst.rows, src.step) ); + + imgproc::kernel_remap<<>>(xmap.ptr, ymap.ptr, xmap.step, dst.ptr, dst.step, dst.cols, dst.rows); + + cudaSafeCall( cudaThreadSynchronize() ); + cudaSafeCall( cudaUnbindTexture(imgproc::tex_remap) ); + } +}}} + + +/////////////////////////////////// MeanShiftfiltering /////////////////////////////////////////////// + +namespace imgproc +{ + texture tex_meanshift; extern "C" __global__ void meanshift_kernel( unsigned char* out, int out_step, int cols, int rows, int sp, int sr, int maxIter, float eps ) { @@ -72,9 +102,8 @@ namespace imgproc if( x0 < cols && y0 < rows ) { - int isr2 = sr*sr; - uchar4 c = tex2D( tex_meanshift, x0, y0 ); + uchar4 c = tex2D(tex_meanshift, x0, y0 ); // iterate meanshift procedure for( int iter = 0; iter < maxIter; iter++ ) { @@ -137,26 +166,6 @@ namespace imgproc namespace cv { namespace gpu { namespace impl { - using namespace imgproc; - - extern "C" void remap_gpu(const DevMem2D& src, const DevMem2D_& xmap, const DevMem2D_& ymap, DevMem2D dst) - { - dim3 block(16, 16, 1); - dim3 grid(1, 1, 1); - grid.x = divUp(dst.cols, block.x); - grid.y = divUp(dst.rows, block.y); - - tex1.filterMode = cudaFilterModeLinear; - tex1.addressMode[0] = tex1.addressMode[1] = cudaAddressModeWrap; - cudaChannelFormatDesc desc = cudaCreateChannelDesc(); - cudaSafeCall( cudaBindTexture2D(0, tex1, src.ptr, desc, dst.cols, dst.rows, src.step) ); - - kernel_remap<<>>(xmap.ptr, ymap.ptr, xmap.step, dst.ptr, dst.step, dst.cols, dst.rows); - - cudaSafeCall( cudaThreadSynchronize() ); - cudaSafeCall( cudaUnbindTexture(tex1) ); - } - extern "C" void meanShiftFiltering_gpu(const DevMem2D& src, DevMem2D dst, float sp, float sr, int maxIter, float eps) { dim3 grid(1, 1, 1); @@ -165,11 +174,11 @@ namespace cv { namespace gpu { namespace impl grid.y = divUp(src.rows, threads.y); cudaChannelFormatDesc desc = cudaCreateChannelDesc(); - cudaSafeCall( cudaBindTexture2D( 0, tex_meanshift, src.ptr, desc, src.cols, src.rows, src.step ) ); + cudaSafeCall( cudaBindTexture2D( 0, imgproc::tex_meanshift, src.ptr, desc, src.cols, src.rows, src.step ) ); - meanshift_kernel<<< grid, threads >>>( dst.ptr, dst.step, dst.cols, dst.rows, sp, sr, maxIter, eps ); + imgproc::meanshift_kernel<<< grid, threads >>>( dst.ptr, dst.step, dst.cols, dst.rows, sp, sr, maxIter, eps ); cudaSafeCall( cudaThreadSynchronize() ); - cudaSafeCall( cudaUnbindTexture( tex_meanshift ) ); + cudaSafeCall( cudaUnbindTexture( imgproc::tex_meanshift ) ); } }}} diff --git a/modules/gpu/src/cudastream.cpp b/modules/gpu/src/cudastream.cpp index 560319764..0f9647324 100644 --- a/modules/gpu/src/cudastream.cpp +++ b/modules/gpu/src/cudastream.cpp @@ -48,28 +48,28 @@ using namespace cv::gpu; #if !defined (HAVE_CUDA) -void cv::gpu::CudaStream::create() { throw_nogpu(); } -void cv::gpu::CudaStream::release() { throw_nogpu(); } -cv::gpu::CudaStream::CudaStream() : impl(0) { throw_nogpu(); } -cv::gpu::CudaStream::~CudaStream() { throw_nogpu(); } -cv::gpu::CudaStream::CudaStream(const CudaStream& /*stream*/) { throw_nogpu(); } -CudaStream& cv::gpu::CudaStream::operator=(const CudaStream& /*stream*/) { throw_nogpu(); return *this; } -bool cv::gpu::CudaStream::queryIfComplete() { throw_nogpu(); return true; } -void cv::gpu::CudaStream::waitForCompletion() { throw_nogpu(); } -void cv::gpu::CudaStream::enqueueDownload(const GpuMat& /*src*/, Mat& /*dst*/) { throw_nogpu(); } -void cv::gpu::CudaStream::enqueueDownload(const GpuMat& /*src*/, MatPL& /*dst*/) { throw_nogpu(); } -void cv::gpu::CudaStream::enqueueUpload(const MatPL& /*src*/, GpuMat& /*dst*/) { throw_nogpu(); } -void cv::gpu::CudaStream::enqueueUpload(const Mat& /*src*/, GpuMat& /*dst*/) { throw_nogpu(); } -void cv::gpu::CudaStream::enqueueCopy(const GpuMat& /*src*/, GpuMat& /*dst*/) { throw_nogpu(); } -void cv::gpu::CudaStream::enqueueMemSet(const GpuMat& /*src*/, Scalar /*val*/) { throw_nogpu(); } -void cv::gpu::CudaStream::enqueueMemSet(const GpuMat& /*src*/, Scalar /*val*/, const GpuMat& /*mask*/) { throw_nogpu(); } -void cv::gpu::CudaStream::enqueueConvert(const GpuMat& /*src*/, GpuMat& /*dst*/, int /*type*/, double /*a*/, double /*b*/) { throw_nogpu(); } +void cv::gpu::Stream::create() { throw_nogpu(); } +void cv::gpu::Stream::release() { throw_nogpu(); } +cv::gpu::Stream::Stream() : impl(0) { throw_nogpu(); } +cv::gpu::Stream::~Stream() { throw_nogpu(); } +cv::gpu::Stream::Stream(const Stream& /*stream*/) { throw_nogpu(); } +Stream& cv::gpu::Stream::operator=(const Stream& /*stream*/) { throw_nogpu(); return *this; } +bool cv::gpu::Stream::queryIfComplete() { throw_nogpu(); return true; } +void cv::gpu::Stream::waitForCompletion() { throw_nogpu(); } +void cv::gpu::Stream::enqueueDownload(const GpuMat& /*src*/, Mat& /*dst*/) { throw_nogpu(); } +void cv::gpu::Stream::enqueueDownload(const GpuMat& /*src*/, MatPL& /*dst*/) { throw_nogpu(); } +void cv::gpu::Stream::enqueueUpload(const MatPL& /*src*/, GpuMat& /*dst*/) { throw_nogpu(); } +void cv::gpu::Stream::enqueueUpload(const Mat& /*src*/, GpuMat& /*dst*/) { throw_nogpu(); } +void cv::gpu::Stream::enqueueCopy(const GpuMat& /*src*/, GpuMat& /*dst*/) { throw_nogpu(); } +void cv::gpu::Stream::enqueueMemSet(const GpuMat& /*src*/, Scalar /*val*/) { throw_nogpu(); } +void cv::gpu::Stream::enqueueMemSet(const GpuMat& /*src*/, Scalar /*val*/, const GpuMat& /*mask*/) { throw_nogpu(); } +void cv::gpu::Stream::enqueueConvert(const GpuMat& /*src*/, GpuMat& /*dst*/, int /*type*/, double /*a*/, double /*b*/) { throw_nogpu(); } #else /* !defined (HAVE_CUDA) */ #include "opencv2/gpu/stream_accessor.hpp" -struct CudaStream::Impl +struct Stream::Impl { cudaStream_t stream; int ref_counter; @@ -85,9 +85,9 @@ namespace }; } -CV_EXPORTS cudaStream_t cv::gpu::StreamAccessor::getStream(const CudaStream& stream) { return stream.impl->stream; }; +CV_EXPORTS cudaStream_t cv::gpu::StreamAccessor::getStream(const Stream& stream) { return stream.impl->stream; }; -void cv::gpu::CudaStream::create() +void cv::gpu::Stream::create() { if (impl) release(); @@ -95,13 +95,13 @@ void cv::gpu::CudaStream::create() cudaStream_t stream; cudaSafeCall( cudaStreamCreate( &stream ) ); - impl = (CudaStream::Impl*)fastMalloc(sizeof(CudaStream::Impl)); + impl = (Stream::Impl*)fastMalloc(sizeof(Stream::Impl)); impl->stream = stream; impl->ref_counter = 1; } -void cv::gpu::CudaStream::release() +void cv::gpu::Stream::release() { if( impl && CV_XADD(&impl->ref_counter, -1) == 1 ) { @@ -110,15 +110,15 @@ void cv::gpu::CudaStream::release() } } -cv::gpu::CudaStream::CudaStream() : impl(0) { create(); } -cv::gpu::CudaStream::~CudaStream() { release(); } +cv::gpu::Stream::Stream() : impl(0) { create(); } +cv::gpu::Stream::~Stream() { release(); } -cv::gpu::CudaStream::CudaStream(const CudaStream& stream) : impl(stream.impl) +cv::gpu::Stream::Stream(const Stream& stream) : impl(stream.impl) { if( impl ) CV_XADD(&impl->ref_counter, 1); } -CudaStream& cv::gpu::CudaStream::operator=(const CudaStream& stream) +Stream& cv::gpu::Stream::operator=(const Stream& stream) { if( this != &stream ) { @@ -131,7 +131,7 @@ CudaStream& cv::gpu::CudaStream::operator=(const CudaStream& stream) return *this; } -bool cv::gpu::CudaStream::queryIfComplete() +bool cv::gpu::Stream::queryIfComplete() { cudaError_t err = cudaStreamQuery( impl->stream ); @@ -142,31 +142,31 @@ bool cv::gpu::CudaStream::queryIfComplete() return false; } -void cv::gpu::CudaStream::waitForCompletion() { cudaSafeCall( cudaStreamSynchronize( impl->stream ) ); } +void cv::gpu::Stream::waitForCompletion() { cudaSafeCall( cudaStreamSynchronize( impl->stream ) ); } -void cv::gpu::CudaStream::enqueueDownload(const GpuMat& src, Mat& dst) +void cv::gpu::Stream::enqueueDownload(const GpuMat& src, Mat& dst) { // if not -> allocation will be done, but after that dst will not point to page locked memory CV_Assert(src.cols == dst.cols && src.rows == dst.rows && src.type() == dst.type() ) devcopy(src, dst, impl->stream, cudaMemcpyDeviceToHost); } -void cv::gpu::CudaStream::enqueueDownload(const GpuMat& src, MatPL& dst) { devcopy(src, dst, impl->stream, cudaMemcpyDeviceToHost); } +void cv::gpu::Stream::enqueueDownload(const GpuMat& src, MatPL& dst) { devcopy(src, dst, impl->stream, cudaMemcpyDeviceToHost); } -void cv::gpu::CudaStream::enqueueUpload(const MatPL& src, GpuMat& dst){ devcopy(src, dst, impl->stream, cudaMemcpyHostToDevice); } -void cv::gpu::CudaStream::enqueueUpload(const Mat& src, GpuMat& dst) { devcopy(src, dst, impl->stream, cudaMemcpyHostToDevice); } -void cv::gpu::CudaStream::enqueueCopy(const GpuMat& src, GpuMat& dst) { devcopy(src, dst, impl->stream, cudaMemcpyDeviceToDevice); } +void cv::gpu::Stream::enqueueUpload(const MatPL& src, GpuMat& dst){ devcopy(src, dst, impl->stream, cudaMemcpyHostToDevice); } +void cv::gpu::Stream::enqueueUpload(const Mat& src, GpuMat& dst) { devcopy(src, dst, impl->stream, cudaMemcpyHostToDevice); } +void cv::gpu::Stream::enqueueCopy(const GpuMat& src, GpuMat& dst) { devcopy(src, dst, impl->stream, cudaMemcpyDeviceToDevice); } -void cv::gpu::CudaStream::enqueueMemSet(const GpuMat& src, Scalar val) +void cv::gpu::Stream::enqueueMemSet(const GpuMat& src, Scalar val) { impl::set_to_without_mask(src, src.depth(), val.val, src.channels(), impl->stream); } -void cv::gpu::CudaStream::enqueueMemSet(const GpuMat& src, Scalar val, const GpuMat& mask) +void cv::gpu::Stream::enqueueMemSet(const GpuMat& src, Scalar val, const GpuMat& mask) { impl::set_to_with_mask(src, src.depth(), val.val, mask, src.channels(), impl->stream); } -void cv::gpu::CudaStream::enqueueConvert(const GpuMat& src, GpuMat& dst, int rtype, double alpha, double beta) +void cv::gpu::Stream::enqueueConvert(const GpuMat& src, GpuMat& dst, int rtype, double alpha, double beta) { bool noScale = fabs(alpha-1) < std::numeric_limits::epsilon() && fabs(beta) < std::numeric_limits::epsilon(); diff --git a/modules/gpu/src/matrix_operations.cpp b/modules/gpu/src/matrix_operations.cpp index 88d2e2145..cdab363d8 100644 --- a/modules/gpu/src/matrix_operations.cpp +++ b/modules/gpu/src/matrix_operations.cpp @@ -82,7 +82,7 @@ void cv::gpu::GpuMat::upload(const Mat& m) cudaSafeCall( cudaMemcpy2D(data, step, m.data, m.step, cols * elemSize(), rows, cudaMemcpyHostToDevice) ); } -void cv::gpu::GpuMat::upload(const MatPL& m, CudaStream& stream) +void cv::gpu::GpuMat::upload(const MatPL& m, Stream& stream) { CV_DbgAssert(!m.empty()); stream.enqueueUpload(m, *this); @@ -95,7 +95,7 @@ void cv::gpu::GpuMat::download(cv::Mat& m) const cudaSafeCall( cudaMemcpy2D(m.data, m.step, data, step, cols * elemSize(), rows, cudaMemcpyDeviceToHost) ); } -void cv::gpu::GpuMat::download(MatPL& m, CudaStream& stream) const +void cv::gpu::GpuMat::download(MatPL& m, Stream& stream) const { CV_DbgAssert(!m.empty()); stream.enqueueDownload(*this, m); diff --git a/modules/gpu/src/stereobm_gpu.cpp b/modules/gpu/src/stereobm_gpu.cpp index 91d2516e9..2c1f56b6d 100644 --- a/modules/gpu/src/stereobm_gpu.cpp +++ b/modules/gpu/src/stereobm_gpu.cpp @@ -52,7 +52,7 @@ cv::gpu::StereoBM_GPU::StereoBM_GPU(int, int, int) { throw_nogpu(); } bool cv::gpu::StereoBM_GPU::checkIfGpuCallReasonable() { throw_nogpu(); return false; } void cv::gpu::StereoBM_GPU::operator() ( const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); } -void cv::gpu::StereoBM_GPU::operator() ( const GpuMat&, const GpuMat&, GpuMat&, const CudaStream&) { throw_nogpu(); } +void cv::gpu::StereoBM_GPU::operator() ( const GpuMat&, const GpuMat&, GpuMat&, const Stream&) { throw_nogpu(); } #else /* !defined (HAVE_CUDA) */ @@ -134,7 +134,7 @@ void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right ::stereo_bm_gpu_operator(minSSD, leBuf, riBuf, preset, ndisp, winSize, avergeTexThreshold, left, right, disparity, 0); } -void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity, const CudaStream& stream) +void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity, const Stream& stream) { ::stereo_bm_gpu_operator(minSSD, leBuf, riBuf, preset, ndisp, winSize, avergeTexThreshold, left, right, disparity, StreamAccessor::getStream(stream)); } diff --git a/tests/gpu/src/operator_async_call.cpp b/tests/gpu/src/operator_async_call.cpp index 17068815d..024622619 100644 --- a/tests/gpu/src/operator_async_call.cpp +++ b/tests/gpu/src/operator_async_call.cpp @@ -58,7 +58,7 @@ bool CV_GpuMatASyncCall::compare_matrix(cv::Mat & cpumat, gpu::GpuMat & gpumat) //int64 time = getTickCount(); - CudaStream stream; + Stream stream; stream.enqueueCopy(gmat0, gmat1); stream.enqueueCopy(gmat0, gmat2); stream.enqueueCopy(gmat0, gmat3);