implemented asynchronous call for StereoBM()

This commit is contained in:
Andrey Morozov 2010-07-27 08:29:40 +00:00
parent dc0f313924
commit 12dc52c2e7
4 changed files with 116 additions and 99 deletions

View File

@ -349,7 +349,7 @@ namespace cv
void operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity);
//! Acync version
void operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity, const CudaStream& stream);
void operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity, const CudaStream & stream);
//! Some heuristics that tries to estmate
// if current GPU will be faster then CPU in this algorithm.

View File

@ -311,7 +311,7 @@ __global__ void stereoKernel(unsigned char *left, unsigned char *right, size_t i
namespace cv { namespace gpu { namespace impl
{
template<int RADIUS> void kernel_caller(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int maxdisp)
template<int RADIUS> void kernel_caller(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int maxdisp, const cudaStream_t & stream)
{
dim3 grid(1,1,1);
dim3 threads(BLOCK_W, 1, 1);
@ -322,11 +322,19 @@ namespace cv { namespace gpu { namespace impl
//See above: #define COL_SSD_SIZE (BLOCK_W + 2 * RADIUS)
size_t smem_size = (BLOCK_W + N_DISPARITIES * (BLOCK_W + 2 * RADIUS)) * sizeof(unsigned int);
stereobm_gpu::stereoKernel<RADIUS><<<grid, threads, smem_size>>>(left.ptr, right.ptr, left.step, disp.ptr, disp.step, maxdisp);
cudaSafeCall( cudaThreadSynchronize() );
if (stream == 0)
{
stereobm_gpu::stereoKernel<RADIUS><<<grid, threads, smem_size>>>(left.ptr, right.ptr, left.step, disp.ptr, disp.step, maxdisp);
cudaSafeCall( cudaThreadSynchronize() );
}
else
{
stereobm_gpu::stereoKernel<RADIUS><<<grid, threads, smem_size, stream>>>(left.ptr, right.ptr, left.step, disp.ptr, disp.step, maxdisp);
}
};
typedef void (*kernel_caller_t)(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int maxdisp);
typedef void (*kernel_caller_t)(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int maxdisp, const cudaStream_t & stream);
const static kernel_caller_t callers[] =
{
@ -341,7 +349,7 @@ namespace cv { namespace gpu { namespace impl
};
const int calles_num = sizeof(callers)/sizeof(callers[0]);
extern "C" void stereoBM_GPU(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int maxdisp, int winsz, const DevMem2D_<unsigned int>& minSSD_buf)
extern "C" void stereoBM_GPU(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int maxdisp, int winsz, const DevMem2D_<unsigned int>& minSSD_buf, const cudaStream_t & stream)
{
int winsz2 = winsz >> 1;
@ -361,7 +369,7 @@ namespace cv { namespace gpu { namespace impl
size_t minssd_step = minSSD_buf.step/minSSD_buf.elemSize();
cudaSafeCall( cudaMemcpyToSymbol( stereobm_gpu::cminSSD_step, &minssd_step, sizeof(minssd_step) ) );
callers[winsz2](left, right, disp, maxdisp);
callers[winsz2](left, right, disp, maxdisp, stream);
}
}}}

View File

@ -54,6 +54,7 @@
#include <limits>
#include "opencv2/gpu/gpu.hpp"
#include "opencv2/gpu/stream_accessor.hpp"
#if defined(HAVE_CUDA)

View File

@ -60,7 +60,8 @@ namespace cv { namespace gpu
{
namespace impl
{
extern "C" void stereoBM_GPU(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int ndisp, int winsz, const DevMem2D_<uint>& minSSD_buf);
//extern "C" void stereoBM_GPU(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int ndisp, int winsz, const DevMem2D_<uint>& minSSD_buf);
extern "C" void stereoBM_GPU(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int ndisp, int winsz, const DevMem2D_<uint>& minSSD_buf, const cudaStream_t & stream);
extern "C" void prefilter_xsobel(const DevMem2D& input, const DevMem2D& output, int prefilterCap = 31);
extern "C" void postfilter_textureness(const DevMem2D& input, int winsz, float avergeTexThreshold, const DevMem2D& disp);
}
@ -97,7 +98,7 @@ bool cv::gpu::StereoBM_GPU::checkIfGpuCallReasonable()
return false;
}
void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity)
void stereo_gpu_operator ( GpuMat& minSSD, GpuMat& leBuf, GpuMat& riBuf, int preset, int ndisp, int winSize, float avergeTexThreshold, const GpuMat& left, const GpuMat& right, GpuMat& disparity, const cudaStream_t & stream)
{
CV_DbgAssert(left.rows == right.rows && left.cols == right.cols);
CV_DbgAssert(left.type() == CV_8UC1);
@ -109,7 +110,7 @@ void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right
GpuMat le_for_bm = left;
GpuMat ri_for_bm = right;
if (preset == PREFILTER_XSOBEL)
if (preset == StereoBM_GPU::PREFILTER_XSOBEL)
{
leBuf.create( left.size(), left.type());
riBuf.create(right.size(), right.type());
@ -120,15 +121,22 @@ void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right
le_for_bm = leBuf;
ri_for_bm = riBuf;
}
impl::stereoBM_GPU(le_for_bm, ri_for_bm, disparity, ndisp, winSize, minSSD);
impl::stereoBM_GPU(le_for_bm, ri_for_bm, disparity, ndisp, winSize, minSSD, stream);
if (avergeTexThreshold)
impl::postfilter_textureness(le_for_bm, winSize, avergeTexThreshold, disparity);
}
void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity)
{
::stereo_gpu_operator(minSSD, leBuf, riBuf, preset, ndisp, winSize, avergeTexThreshold, left, right, disparity, 0);
}
void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity, const CudaStream& stream)
{
CV_Assert(!"Not implemented");
::stereo_gpu_operator(minSSD, leBuf, riBuf, preset, ndisp, winSize, avergeTexThreshold, left, right, disparity, StreamAccessor::getStream(stream));
}
#endif /* !defined (HAVE_CUDA) */