Added the GPU version of the Farneback's optical flow

This commit is contained in:
Alexey Spizhevoy 2012-02-16 11:23:51 +00:00
parent 59ff1a4ccb
commit 5c459aa815
9 changed files with 196 additions and 13 deletions

View File

@ -214,6 +214,8 @@ namespace cv { namespace gpu
CV_EXPORTS void ensureSizeIsEnough(int rows, int cols, int type, GpuMat& m);
CV_EXPORTS void ensureSizeIsEnough(Size size, int type, GpuMat& m);
CV_EXPORTS GpuMat allocMatFromBuf(int rows, int cols, int type, GpuMat &mat);
////////////////////////////////////////////////////////////////////////
// Error handling
@ -459,6 +461,13 @@ namespace cv { namespace gpu
else
m.create(rows, cols, type);
}
inline GpuMat allocMatFromBuf(int rows, int cols, int type, GpuMat &mat)
{
if (!mat.empty() && mat.type() == type && mat.rows >= rows && mat.cols >= cols)
return mat(Rect(0, 0, cols, rows));
return mat = GpuMat(rows, cols, type);
}
}}
#endif // __cplusplus

View File

@ -1819,6 +1819,70 @@ private:
vector<GpuMat> vPyr_;
};
class CV_EXPORTS FarnebackOpticalFlow
{
public:
FarnebackOpticalFlow()
{
numLevels = 5;
pyrScale = 0.5;
fastPyramids = false;
winSize = 13;
numIters = 10;
polyN = 5;
polySigma = 1.1;
flags = 0;
}
int numLevels;
double pyrScale;
bool fastPyramids;
int winSize;
int numIters;
int polyN;
double polySigma;
int flags;
void operator ()(const GpuMat &frame0, const GpuMat &frame1, GpuMat &flowx, GpuMat &flowy, Stream &s = Stream::Null());
void releaseMemory()
{
frames_[0].release();
frames_[1].release();
I_[0].release();
I_[1].release();
M_.release();
bufM_.release();
R_[0].release();
R_[1].release();
tmp_[0].release();
tmp_[1].release();
pyramid0_.clear();
pyramid1_.clear();
}
private:
void prepareGaussian(
int n, double sigma, float *g, float *xg, float *xxg,
double &ig11, double &ig03, double &ig33, double &ig55);
void setPolynomialExpansionConsts(int n, double sigma);
void updateFlow_boxFilter(
const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat &flowy,
GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[]);
void updateFlow_gaussianBlur(
const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat& flowy,
GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[]);
GpuMat frames_[2];
GpuMat I_[2], M_, bufM_, R_[2], tmp_[2];
std::vector<GpuMat> pyramid0_, pyramid1_;
};
//! Interpolate frames (images) using provided optical flow (displacement field).
//! frame0 - frame 0 (32-bit floating point images, single channel)
//! frame1 - frame 1 (the same type and size)

View File

@ -81,6 +81,7 @@ namespace cv { namespace gpu
struct Stream::Impl
{
static cudaStream_t getStream(const Impl* impl) { return impl ? impl->stream : 0; }
cudaStream_t stream;
int ref_counter;
};
@ -95,7 +96,10 @@ namespace
};
}
CV_EXPORTS cudaStream_t cv::gpu::StreamAccessor::getStream(const Stream& stream) { return stream.impl ? stream.impl->stream : 0; };
CV_EXPORTS cudaStream_t cv::gpu::StreamAccessor::getStream(const Stream& stream)
{
return Stream::Impl::getStream(stream.impl);
};
void cv::gpu::Stream::create()
{
@ -143,7 +147,7 @@ Stream& cv::gpu::Stream::operator=(const Stream& stream)
bool cv::gpu::Stream::queryIfComplete()
{
cudaError_t err = cudaStreamQuery( impl->stream );
cudaError_t err = cudaStreamQuery( Impl::getStream(impl) );
if (err == cudaErrorNotReady || err == cudaSuccess)
return err == cudaSuccess;
@ -152,19 +156,19 @@ bool cv::gpu::Stream::queryIfComplete()
return false;
}
void cv::gpu::Stream::waitForCompletion() { cudaSafeCall( cudaStreamSynchronize( impl->stream ) ); }
void cv::gpu::Stream::waitForCompletion() { cudaSafeCall( cudaStreamSynchronize( Impl::getStream(impl) ) ); }
void cv::gpu::Stream::enqueueDownload(const GpuMat& src, Mat& dst)
{
// if not -> allocation will be done, but after that dst will not point to page locked memory
CV_Assert(src.cols == dst.cols && src.rows == dst.rows && src.type() == dst.type() );
devcopy(src, dst, impl->stream, cudaMemcpyDeviceToHost);
devcopy(src, dst, Impl::getStream(impl), cudaMemcpyDeviceToHost);
}
void cv::gpu::Stream::enqueueDownload(const GpuMat& src, CudaMem& dst) { devcopy(src, dst, impl->stream, cudaMemcpyDeviceToHost); }
void cv::gpu::Stream::enqueueDownload(const GpuMat& src, CudaMem& dst) { devcopy(src, dst, Impl::getStream(impl), cudaMemcpyDeviceToHost); }
void cv::gpu::Stream::enqueueUpload(const CudaMem& src, GpuMat& dst){ devcopy(src, dst, impl->stream, cudaMemcpyHostToDevice); }
void cv::gpu::Stream::enqueueUpload(const Mat& src, GpuMat& dst) { devcopy(src, dst, impl->stream, cudaMemcpyHostToDevice); }
void cv::gpu::Stream::enqueueCopy(const GpuMat& src, GpuMat& dst) { devcopy(src, dst, impl->stream, cudaMemcpyDeviceToDevice); }
void cv::gpu::Stream::enqueueUpload(const CudaMem& src, GpuMat& dst){ devcopy(src, dst, Impl::getStream(impl), cudaMemcpyHostToDevice); }
void cv::gpu::Stream::enqueueUpload(const Mat& src, GpuMat& dst) { devcopy(src, dst, Impl::getStream(impl), cudaMemcpyHostToDevice); }
void cv::gpu::Stream::enqueueCopy(const GpuMat& src, GpuMat& dst) { devcopy(src, dst, Impl::getStream(impl), cudaMemcpyDeviceToDevice); }
void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar s)
{
@ -173,7 +177,7 @@ void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar s)
if (s[0] == 0.0 && s[1] == 0.0 && s[2] == 0.0 && s[3] == 0.0)
{
cudaSafeCall( cudaMemset2DAsync(src.data, src.step, 0, src.cols * src.elemSize(), src.rows, impl->stream) );
cudaSafeCall( cudaMemset2DAsync(src.data, src.step, 0, src.cols * src.elemSize(), src.rows, Impl::getStream(impl)) );
return;
}
if (src.depth() == CV_8U)
@ -183,12 +187,12 @@ void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar s)
if (cn == 1 || (cn == 2 && s[0] == s[1]) || (cn == 3 && s[0] == s[1] && s[0] == s[2]) || (cn == 4 && s[0] == s[1] && s[0] == s[2] && s[0] == s[3]))
{
int val = saturate_cast<uchar>(s[0]);
cudaSafeCall( cudaMemset2DAsync(src.data, src.step, val, src.cols * src.elemSize(), src.rows, impl->stream) );
cudaSafeCall( cudaMemset2DAsync(src.data, src.step, val, src.cols * src.elemSize(), src.rows, Impl::getStream(impl)) );
return;
}
}
setTo(src, s, impl->stream);
setTo(src, s, Impl::getStream(impl));
}
void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val, const GpuMat& mask)
@ -198,7 +202,7 @@ void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val, const GpuMat& mask)
CV_Assert(mask.type() == CV_8UC1);
setTo(src, val, mask, impl->stream);
setTo(src, val, mask, Impl::getStream(impl));
}
void cv::gpu::Stream::enqueueConvert(const GpuMat& src, GpuMat& dst, int rtype, double alpha, double beta)
@ -226,7 +230,7 @@ void cv::gpu::Stream::enqueueConvert(const GpuMat& src, GpuMat& dst, int rtype,
psrc = &(temp = src);
dst.create( src.size(), rtype );
convertTo(src, dst, alpha, beta, impl->stream);
convertTo(src, dst, alpha, beta, Impl::getStream(impl));
}
cv::gpu::Stream::operator bool() const

View File

@ -65,6 +65,7 @@
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/calib3d/calib3d.hpp"
#include "opencv2/core/internal.hpp"
#include "opencv2/video/video.hpp"
#define OPENCV_GPU_UNUSED(x) (void)x

View File

@ -44,6 +44,7 @@
#include <cmath>
#include <cstdio>
#include <iostream>
#include <fstream>
#include <sstream>
#include <limits>

View File

@ -423,3 +423,74 @@ TEST_P(PyrLKOpticalFlowSparse, Accuracy)
INSTANTIATE_TEST_CASE_P(Video, PyrLKOpticalFlowSparse, Combine(ALL_DEVICES, Bool()));
#endif // HAVE_CUDA
PARAM_TEST_CASE(FarnebackOpticalFlowTest, cv::gpu::DeviceInfo, double, int, int, bool)
{
Mat frame0, frame1;
double pyrScale;
int polyN;
double polySigma;
int flags;
bool useInitFlow;
virtual void SetUp()
{
frame0 = readImage("opticalflow/rubberwhale1.png", cv::IMREAD_GRAYSCALE);
frame1 = readImage("opticalflow/rubberwhale2.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame0.empty()); ASSERT_FALSE(frame1.empty());
cv::gpu::setDevice(GET_PARAM(0).deviceID());
pyrScale = GET_PARAM(1);
polyN = GET_PARAM(2);
polySigma = polyN <= 5 ? 1.1 : 1.5;
flags = GET_PARAM(3);
useInitFlow = GET_PARAM(4);
}
};
TEST_P(FarnebackOpticalFlowTest, Accuracy)
{
using namespace cv;
gpu::FarnebackOpticalFlow calc;
calc.pyrScale = pyrScale;
calc.polyN = polyN;
calc.polySigma = polySigma;
calc.flags = flags;
gpu::GpuMat d_flowx, d_flowy;
calc(gpu::GpuMat(frame0), gpu::GpuMat(frame1), d_flowx, d_flowy);
Mat flow;
if (useInitFlow)
{
Mat flowxy[] = {(Mat)d_flowx, (Mat)d_flowy};
merge(flowxy, 2, flow);
}
if (useInitFlow)
{
calc.flags |= OPTFLOW_USE_INITIAL_FLOW;
calc(gpu::GpuMat(frame0), gpu::GpuMat(frame1), d_flowx, d_flowy);
}
calcOpticalFlowFarneback(
frame0, frame1, flow, calc.pyrScale, calc.numLevels, calc.winSize,
calc.numIters, calc.polyN, calc.polySigma, calc.flags);
std::vector<Mat> flowxy; split(flow, flowxy);
/*std::cout << checkSimilarity(flowxy[0], (Mat)d_flowx) << " "
<< checkSimilarity(flowxy[1], (Mat)d_flowy) << std::endl;*/
EXPECT_LT(checkSimilarity(flowxy[0], (Mat)d_flowx), 0.1);
EXPECT_LT(checkSimilarity(flowxy[1], (Mat)d_flowy), 0.1);
}
INSTANTIATE_TEST_CASE_P(Video, FarnebackOpticalFlowTest,
Combine(ALL_DEVICES,
Values(0.3, 0.5, 0.8),
Values(5, 7),
Values(0, (int)cv::OPTFLOW_FARNEBACK_GAUSSIAN),
Values(false, true)));

BIN
samples/gpu/basketball1.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 130 KiB

BIN
samples/gpu/basketball2.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 130 KiB

View File

@ -1183,3 +1183,36 @@ TEST(PyrLKOpticalFlow)
GPU_OFF;
}
}
TEST(FarnebackOpticalFlow)
{
const string names[] = {"rubberwhale", "basketball"};
for (size_t i = 0; i < sizeof(names)/sizeof(*names); ++i) {
for (int fastPyramids = 0; fastPyramids < 2; ++fastPyramids) {
for (int useGaussianBlur = 0; useGaussianBlur < 2; ++useGaussianBlur) {
SUBTEST << "dataset=" << names[i] << ", fastPyramids=" << fastPyramids << ", useGaussianBlur=" << useGaussianBlur;
Mat frame0 = imread(abspath(names[i] + "1.png"), IMREAD_GRAYSCALE);
Mat frame1 = imread(abspath(names[i] + "2.png"), IMREAD_GRAYSCALE);
if (frame0.empty()) throw runtime_error("can't open " + names[i] + "1.png");
if (frame1.empty()) throw runtime_error("can't open " + names[i] + "2.png");
gpu::FarnebackOpticalFlow calc;
calc.fastPyramids = fastPyramids;
calc.flags |= useGaussianBlur ? OPTFLOW_FARNEBACK_GAUSSIAN : 0;
gpu::GpuMat d_frame0(frame0), d_frame1(frame1), d_flowx, d_flowy;
calc(d_frame0, d_frame1, d_flowx, d_flowy);
GPU_ON;
calc(d_frame0, d_frame1, d_flowx, d_flowy);
GPU_OFF;
Mat flow;
calcOpticalFlowFarneback(frame0, frame1, flow, calc.pyrScale, calc.numLevels, calc.winSize, calc.numIters, calc.polyN, calc.polySigma, calc.flags);
CPU_ON;
calcOpticalFlowFarneback(frame0, frame1, flow, calc.pyrScale, calc.numLevels, calc.winSize, calc.numIters, calc.polyN, calc.polySigma, calc.flags);
CPU_OFF;
}}}
}