Added the GPU version of the Farneback's optical flow

This commit is contained in:
Alexey Spizhevoy
2012-02-16 11:23:51 +00:00
parent 59ff1a4ccb
commit 5c459aa815
9 changed files with 196 additions and 13 deletions

View File

@@ -81,6 +81,7 @@ namespace cv { namespace gpu
struct Stream::Impl
{
static cudaStream_t getStream(const Impl* impl) { return impl ? impl->stream : 0; }
cudaStream_t stream;
int ref_counter;
};
@@ -95,7 +96,10 @@ namespace
};
}
CV_EXPORTS cudaStream_t cv::gpu::StreamAccessor::getStream(const Stream& stream) { return stream.impl ? stream.impl->stream : 0; };
CV_EXPORTS cudaStream_t cv::gpu::StreamAccessor::getStream(const Stream& stream)
{
return Stream::Impl::getStream(stream.impl);
};
void cv::gpu::Stream::create()
{
@@ -143,7 +147,7 @@ Stream& cv::gpu::Stream::operator=(const Stream& stream)
bool cv::gpu::Stream::queryIfComplete()
{
cudaError_t err = cudaStreamQuery( impl->stream );
cudaError_t err = cudaStreamQuery( Impl::getStream(impl) );
if (err == cudaErrorNotReady || err == cudaSuccess)
return err == cudaSuccess;
@@ -152,19 +156,19 @@ bool cv::gpu::Stream::queryIfComplete()
return false;
}
void cv::gpu::Stream::waitForCompletion() { cudaSafeCall( cudaStreamSynchronize( impl->stream ) ); }
void cv::gpu::Stream::waitForCompletion() { cudaSafeCall( cudaStreamSynchronize( Impl::getStream(impl) ) ); }
void cv::gpu::Stream::enqueueDownload(const GpuMat& src, Mat& dst)
{
// if not -> allocation will be done, but after that dst will not point to page locked memory
CV_Assert(src.cols == dst.cols && src.rows == dst.rows && src.type() == dst.type() );
devcopy(src, dst, impl->stream, cudaMemcpyDeviceToHost);
devcopy(src, dst, Impl::getStream(impl), cudaMemcpyDeviceToHost);
}
void cv::gpu::Stream::enqueueDownload(const GpuMat& src, CudaMem& dst) { devcopy(src, dst, impl->stream, cudaMemcpyDeviceToHost); }
void cv::gpu::Stream::enqueueDownload(const GpuMat& src, CudaMem& dst) { devcopy(src, dst, Impl::getStream(impl), cudaMemcpyDeviceToHost); }
void cv::gpu::Stream::enqueueUpload(const CudaMem& src, GpuMat& dst){ devcopy(src, dst, impl->stream, cudaMemcpyHostToDevice); }
void cv::gpu::Stream::enqueueUpload(const Mat& src, GpuMat& dst) { devcopy(src, dst, impl->stream, cudaMemcpyHostToDevice); }
void cv::gpu::Stream::enqueueCopy(const GpuMat& src, GpuMat& dst) { devcopy(src, dst, impl->stream, cudaMemcpyDeviceToDevice); }
void cv::gpu::Stream::enqueueUpload(const CudaMem& src, GpuMat& dst){ devcopy(src, dst, Impl::getStream(impl), cudaMemcpyHostToDevice); }
void cv::gpu::Stream::enqueueUpload(const Mat& src, GpuMat& dst) { devcopy(src, dst, Impl::getStream(impl), cudaMemcpyHostToDevice); }
void cv::gpu::Stream::enqueueCopy(const GpuMat& src, GpuMat& dst) { devcopy(src, dst, Impl::getStream(impl), cudaMemcpyDeviceToDevice); }
void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar s)
{
@@ -173,7 +177,7 @@ void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar s)
if (s[0] == 0.0 && s[1] == 0.0 && s[2] == 0.0 && s[3] == 0.0)
{
cudaSafeCall( cudaMemset2DAsync(src.data, src.step, 0, src.cols * src.elemSize(), src.rows, impl->stream) );
cudaSafeCall( cudaMemset2DAsync(src.data, src.step, 0, src.cols * src.elemSize(), src.rows, Impl::getStream(impl)) );
return;
}
if (src.depth() == CV_8U)
@@ -183,12 +187,12 @@ void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar s)
if (cn == 1 || (cn == 2 && s[0] == s[1]) || (cn == 3 && s[0] == s[1] && s[0] == s[2]) || (cn == 4 && s[0] == s[1] && s[0] == s[2] && s[0] == s[3]))
{
int val = saturate_cast<uchar>(s[0]);
cudaSafeCall( cudaMemset2DAsync(src.data, src.step, val, src.cols * src.elemSize(), src.rows, impl->stream) );
cudaSafeCall( cudaMemset2DAsync(src.data, src.step, val, src.cols * src.elemSize(), src.rows, Impl::getStream(impl)) );
return;
}
}
setTo(src, s, impl->stream);
setTo(src, s, Impl::getStream(impl));
}
void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val, const GpuMat& mask)
@@ -198,7 +202,7 @@ void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val, const GpuMat& mask)
CV_Assert(mask.type() == CV_8UC1);
setTo(src, val, mask, impl->stream);
setTo(src, val, mask, Impl::getStream(impl));
}
void cv::gpu::Stream::enqueueConvert(const GpuMat& src, GpuMat& dst, int rtype, double alpha, double beta)
@@ -226,7 +230,7 @@ void cv::gpu::Stream::enqueueConvert(const GpuMat& src, GpuMat& dst, int rtype,
psrc = &(temp = src);
dst.create( src.size(), rtype );
convertTo(src, dst, alpha, beta, impl->stream);
convertTo(src, dst, alpha, beta, Impl::getStream(impl));
}
cv::gpu::Stream::operator bool() const

View File

@@ -65,6 +65,7 @@
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/calib3d/calib3d.hpp"
#include "opencv2/core/internal.hpp"
#include "opencv2/video/video.hpp"
#define OPENCV_GPU_UNUSED(x) (void)x