implemented asynchronous call for StereoBM()

2010-07-27 08:29:40 +00:00 · 2010-07-27 08:29:40 +00:00 · 12dc52c2e7
commit 12dc52c2e7
parent dc0f313924
4 changed files with 116 additions and 99 deletions
--- a/modules/gpu/include/opencv2/gpu/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu/gpu.hpp
@ -349,7 +349,7 @@ namespace cv
            void operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity);

            //! Acync version
-            void operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity, const CudaStream& stream);
+            void operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity, const CudaStream & stream);

            //! Some heuristics that tries to estmate
            // if current GPU will be faster then CPU in this algorithm.
--- a/modules/gpu/src/cuda/stereobm.cu
+++ b/modules/gpu/src/cuda/stereobm.cu
@ -311,7 +311,7 @@ __global__ void stereoKernel(unsigned char *left, unsigned char *right, size_t i

 namespace cv { namespace gpu { namespace impl
 {
-    template<int RADIUS> void kernel_caller(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int maxdisp)
+    template<int RADIUS> void kernel_caller(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int maxdisp, const cudaStream_t & stream)
    {
        dim3 grid(1,1,1);
        dim3 threads(BLOCK_W, 1, 1);
@ -322,11 +322,19 @@ namespace cv { namespace gpu { namespace impl
        //See above:  #define COL_SSD_SIZE (BLOCK_W + 2 * RADIUS)
        size_t smem_size = (BLOCK_W + N_DISPARITIES * (BLOCK_W + 2 * RADIUS)) * sizeof(unsigned int);

-        stereobm_gpu::stereoKernel<RADIUS><<<grid, threads, smem_size>>>(left.ptr, right.ptr, left.step, disp.ptr, disp.step, maxdisp);
-        cudaSafeCall( cudaThreadSynchronize() );
+        if (stream == 0)
+        {
+            stereobm_gpu::stereoKernel<RADIUS><<<grid, threads, smem_size>>>(left.ptr, right.ptr, left.step, disp.ptr, disp.step, maxdisp);
+            cudaSafeCall( cudaThreadSynchronize() );
+        }
+        else
+        {
+            stereobm_gpu::stereoKernel<RADIUS><<<grid, threads, smem_size, stream>>>(left.ptr, right.ptr, left.step, disp.ptr, disp.step, maxdisp);
+        }
+
    };

-    typedef void (*kernel_caller_t)(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int maxdisp);
+    typedef void (*kernel_caller_t)(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int maxdisp, const cudaStream_t & stream);

    const static kernel_caller_t callers[] =
    {
@ -341,7 +349,7 @@ namespace cv { namespace gpu { namespace impl
    };
    const int calles_num = sizeof(callers)/sizeof(callers[0]);

-    extern "C" void stereoBM_GPU(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int maxdisp, int winsz, const DevMem2D_<unsigned int>& minSSD_buf)
+    extern "C" void stereoBM_GPU(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int maxdisp, int winsz, const DevMem2D_<unsigned int>& minSSD_buf, const cudaStream_t & stream)
    {
        int winsz2 = winsz >> 1;

@ -361,7 +369,7 @@ namespace cv { namespace gpu { namespace impl
        size_t minssd_step = minSSD_buf.step/minSSD_buf.elemSize();
        cudaSafeCall( cudaMemcpyToSymbol( stereobm_gpu::cminSSD_step,  &minssd_step, sizeof(minssd_step) ) );

-        callers[winsz2](left, right, disp, maxdisp);                                
+        callers[winsz2](left, right, disp, maxdisp, stream);
    }
 }}}

--- a/modules/gpu/src/precomp.hpp
+++ b/modules/gpu/src/precomp.hpp
@ -54,6 +54,7 @@
 #include <limits>

 #include "opencv2/gpu/gpu.hpp"
+#include "opencv2/gpu/stream_accessor.hpp"


 #if defined(HAVE_CUDA)
--- a/modules/gpu/src/stereobm_gpu.cpp
+++ b/modules/gpu/src/stereobm_gpu.cpp
@ -60,7 +60,8 @@ namespace cv { namespace gpu
 {
    namespace impl
    {
-        extern "C" void stereoBM_GPU(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int ndisp, int winsz, const DevMem2D_<uint>& minSSD_buf);
+        //extern "C" void stereoBM_GPU(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int ndisp, int winsz, const DevMem2D_<uint>& minSSD_buf);
+        extern "C" void stereoBM_GPU(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int ndisp, int winsz, const DevMem2D_<uint>& minSSD_buf, const cudaStream_t & stream);
        extern "C" void prefilter_xsobel(const DevMem2D& input, const DevMem2D& output, int prefilterCap = 31);
        extern "C" void postfilter_textureness(const DevMem2D& input, int winsz, float avergeTexThreshold, const DevMem2D& disp);
    }
@ -97,7 +98,7 @@ bool cv::gpu::StereoBM_GPU::checkIfGpuCallReasonable()
    return false;
 }

-void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity)
+void stereo_gpu_operator ( GpuMat& minSSD,  GpuMat& leBuf, GpuMat&  riBuf,  int preset, int ndisp, int winSize, float avergeTexThreshold, const GpuMat& left, const GpuMat& right, GpuMat& disparity, const cudaStream_t & stream)
 {
    CV_DbgAssert(left.rows == right.rows && left.cols == right.cols);
    CV_DbgAssert(left.type() == CV_8UC1);
@ -109,7 +110,7 @@ void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right
    GpuMat le_for_bm =  left;
    GpuMat ri_for_bm = right;

-    if (preset == PREFILTER_XSOBEL)
+    if (preset == StereoBM_GPU::PREFILTER_XSOBEL)
    {
        leBuf.create( left.size(),  left.type());
        riBuf.create(right.size(), right.type());
@ -120,15 +121,22 @@ void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right
        le_for_bm = leBuf;
        ri_for_bm = riBuf;
    }
-    impl::stereoBM_GPU(le_for_bm, ri_for_bm, disparity, ndisp, winSize, minSSD);    
+
+    impl::stereoBM_GPU(le_for_bm, ri_for_bm, disparity, ndisp, winSize, minSSD, stream);

    if (avergeTexThreshold)
        impl::postfilter_textureness(le_for_bm, winSize, avergeTexThreshold, disparity);
 }

+
+void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity)
+{
+    ::stereo_gpu_operator(minSSD, leBuf, riBuf, preset, ndisp, winSize, avergeTexThreshold, left, right, disparity, 0);
+}
+
 void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity, const CudaStream& stream)
 {
-    CV_Assert(!"Not implemented");
+    ::stereo_gpu_operator(minSSD, leBuf, riBuf, preset, ndisp, winSize, avergeTexThreshold, left, right, disparity, StreamAccessor::getStream(stream));
 }

 #endif /* !defined (HAVE_CUDA) */