implemented asynchronous call for StereoBM()
This commit is contained in:
parent
dc0f313924
commit
12dc52c2e7
@ -311,7 +311,7 @@ __global__ void stereoKernel(unsigned char *left, unsigned char *right, size_t i
|
||||
|
||||
namespace cv { namespace gpu { namespace impl
|
||||
{
|
||||
template<int RADIUS> void kernel_caller(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int maxdisp)
|
||||
template<int RADIUS> void kernel_caller(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int maxdisp, const cudaStream_t & stream)
|
||||
{
|
||||
dim3 grid(1,1,1);
|
||||
dim3 threads(BLOCK_W, 1, 1);
|
||||
@ -322,11 +322,19 @@ namespace cv { namespace gpu { namespace impl
|
||||
//See above: #define COL_SSD_SIZE (BLOCK_W + 2 * RADIUS)
|
||||
size_t smem_size = (BLOCK_W + N_DISPARITIES * (BLOCK_W + 2 * RADIUS)) * sizeof(unsigned int);
|
||||
|
||||
if (stream == 0)
|
||||
{
|
||||
stereobm_gpu::stereoKernel<RADIUS><<<grid, threads, smem_size>>>(left.ptr, right.ptr, left.step, disp.ptr, disp.step, maxdisp);
|
||||
cudaSafeCall( cudaThreadSynchronize() );
|
||||
}
|
||||
else
|
||||
{
|
||||
stereobm_gpu::stereoKernel<RADIUS><<<grid, threads, smem_size, stream>>>(left.ptr, right.ptr, left.step, disp.ptr, disp.step, maxdisp);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
typedef void (*kernel_caller_t)(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int maxdisp);
|
||||
typedef void (*kernel_caller_t)(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int maxdisp, const cudaStream_t & stream);
|
||||
|
||||
const static kernel_caller_t callers[] =
|
||||
{
|
||||
@ -341,7 +349,7 @@ namespace cv { namespace gpu { namespace impl
|
||||
};
|
||||
const int calles_num = sizeof(callers)/sizeof(callers[0]);
|
||||
|
||||
extern "C" void stereoBM_GPU(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int maxdisp, int winsz, const DevMem2D_<unsigned int>& minSSD_buf)
|
||||
extern "C" void stereoBM_GPU(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int maxdisp, int winsz, const DevMem2D_<unsigned int>& minSSD_buf, const cudaStream_t & stream)
|
||||
{
|
||||
int winsz2 = winsz >> 1;
|
||||
|
||||
@ -361,7 +369,7 @@ namespace cv { namespace gpu { namespace impl
|
||||
size_t minssd_step = minSSD_buf.step/minSSD_buf.elemSize();
|
||||
cudaSafeCall( cudaMemcpyToSymbol( stereobm_gpu::cminSSD_step, &minssd_step, sizeof(minssd_step) ) );
|
||||
|
||||
callers[winsz2](left, right, disp, maxdisp);
|
||||
callers[winsz2](left, right, disp, maxdisp, stream);
|
||||
}
|
||||
}}}
|
||||
|
||||
|
@ -54,6 +54,7 @@
|
||||
#include <limits>
|
||||
|
||||
#include "opencv2/gpu/gpu.hpp"
|
||||
#include "opencv2/gpu/stream_accessor.hpp"
|
||||
|
||||
|
||||
#if defined(HAVE_CUDA)
|
||||
|
@ -60,7 +60,8 @@ namespace cv { namespace gpu
|
||||
{
|
||||
namespace impl
|
||||
{
|
||||
extern "C" void stereoBM_GPU(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int ndisp, int winsz, const DevMem2D_<uint>& minSSD_buf);
|
||||
//extern "C" void stereoBM_GPU(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int ndisp, int winsz, const DevMem2D_<uint>& minSSD_buf);
|
||||
extern "C" void stereoBM_GPU(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int ndisp, int winsz, const DevMem2D_<uint>& minSSD_buf, const cudaStream_t & stream);
|
||||
extern "C" void prefilter_xsobel(const DevMem2D& input, const DevMem2D& output, int prefilterCap = 31);
|
||||
extern "C" void postfilter_textureness(const DevMem2D& input, int winsz, float avergeTexThreshold, const DevMem2D& disp);
|
||||
}
|
||||
@ -97,7 +98,7 @@ bool cv::gpu::StereoBM_GPU::checkIfGpuCallReasonable()
|
||||
return false;
|
||||
}
|
||||
|
||||
void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity)
|
||||
void stereo_gpu_operator ( GpuMat& minSSD, GpuMat& leBuf, GpuMat& riBuf, int preset, int ndisp, int winSize, float avergeTexThreshold, const GpuMat& left, const GpuMat& right, GpuMat& disparity, const cudaStream_t & stream)
|
||||
{
|
||||
CV_DbgAssert(left.rows == right.rows && left.cols == right.cols);
|
||||
CV_DbgAssert(left.type() == CV_8UC1);
|
||||
@ -109,7 +110,7 @@ void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right
|
||||
GpuMat le_for_bm = left;
|
||||
GpuMat ri_for_bm = right;
|
||||
|
||||
if (preset == PREFILTER_XSOBEL)
|
||||
if (preset == StereoBM_GPU::PREFILTER_XSOBEL)
|
||||
{
|
||||
leBuf.create( left.size(), left.type());
|
||||
riBuf.create(right.size(), right.type());
|
||||
@ -120,15 +121,22 @@ void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right
|
||||
le_for_bm = leBuf;
|
||||
ri_for_bm = riBuf;
|
||||
}
|
||||
impl::stereoBM_GPU(le_for_bm, ri_for_bm, disparity, ndisp, winSize, minSSD);
|
||||
|
||||
impl::stereoBM_GPU(le_for_bm, ri_for_bm, disparity, ndisp, winSize, minSSD, stream);
|
||||
|
||||
if (avergeTexThreshold)
|
||||
impl::postfilter_textureness(le_for_bm, winSize, avergeTexThreshold, disparity);
|
||||
}
|
||||
|
||||
|
||||
void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity)
|
||||
{
|
||||
::stereo_gpu_operator(minSSD, leBuf, riBuf, preset, ndisp, winSize, avergeTexThreshold, left, right, disparity, 0);
|
||||
}
|
||||
|
||||
void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity, const CudaStream& stream)
|
||||
{
|
||||
CV_Assert(!"Not implemented");
|
||||
::stereo_gpu_operator(minSSD, leBuf, riBuf, preset, ndisp, winSize, avergeTexThreshold, left, right, disparity, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
#endif /* !defined (HAVE_CUDA) */
|
Loading…
x
Reference in New Issue
Block a user