refactor cudaoptflow public API:
* use opaque algorithm interfaces * add stream support
This commit is contained in:
@@ -47,84 +47,148 @@ using namespace cv::cuda;
|
||||
|
||||
#if !defined (HAVE_CUDA) || !defined (HAVE_OPENCV_CUDALEGACY) || defined (CUDA_DISABLER)
|
||||
|
||||
void cv::cuda::BroxOpticalFlow::operator ()(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
|
||||
Ptr<BroxOpticalFlow> cv::cuda::BroxOpticalFlow::create(double, double, double, int, int, int) { throw_no_cuda(); return Ptr<BroxOpticalFlow>(); }
|
||||
|
||||
#else
|
||||
|
||||
namespace
|
||||
{
|
||||
size_t getBufSize(const NCVBroxOpticalFlowDescriptor& desc, const NCVMatrix<Ncv32f>& frame0, const NCVMatrix<Ncv32f>& frame1,
|
||||
NCVMatrix<Ncv32f>& u, NCVMatrix<Ncv32f>& v, const cudaDeviceProp& devProp)
|
||||
namespace {
|
||||
|
||||
class BroxOpticalFlowImpl : public BroxOpticalFlow
|
||||
{
|
||||
NCVMemStackAllocator gpuCounter(static_cast<Ncv32u>(devProp.textureAlignment));
|
||||
public:
|
||||
BroxOpticalFlowImpl(double alpha, double gamma, double scale_factor,
|
||||
int inner_iterations, int outer_iterations, int solver_iterations) :
|
||||
alpha_(alpha), gamma_(gamma), scale_factor_(scale_factor),
|
||||
inner_iterations_(inner_iterations), outer_iterations_(outer_iterations),
|
||||
solver_iterations_(solver_iterations)
|
||||
{
|
||||
}
|
||||
|
||||
virtual void calc(InputArray I0, InputArray I1, InputOutputArray flow, Stream& stream);
|
||||
|
||||
virtual double getFlowSmoothness() const { return alpha_; }
|
||||
virtual void setFlowSmoothness(double alpha) { alpha_ = static_cast<float>(alpha); }
|
||||
|
||||
virtual double getGradientConstancyImportance() const { return gamma_; }
|
||||
virtual void setGradientConstancyImportance(double gamma) { gamma_ = static_cast<float>(gamma); }
|
||||
|
||||
virtual double getPyramidScaleFactor() const { return scale_factor_; }
|
||||
virtual void setPyramidScaleFactor(double scale_factor) { scale_factor_ = static_cast<float>(scale_factor); }
|
||||
|
||||
//! number of lagged non-linearity iterations (inner loop)
|
||||
virtual int getInnerIterations() const { return inner_iterations_; }
|
||||
virtual void setInnerIterations(int inner_iterations) { inner_iterations_ = inner_iterations; }
|
||||
|
||||
//! number of warping iterations (number of pyramid levels)
|
||||
virtual int getOuterIterations() const { return outer_iterations_; }
|
||||
virtual void setOuterIterations(int outer_iterations) { outer_iterations_ = outer_iterations; }
|
||||
|
||||
//! number of linear system solver iterations
|
||||
virtual int getSolverIterations() const { return solver_iterations_; }
|
||||
virtual void setSolverIterations(int solver_iterations) { solver_iterations_ = solver_iterations; }
|
||||
|
||||
private:
|
||||
//! flow smoothness
|
||||
float alpha_;
|
||||
|
||||
//! gradient constancy importance
|
||||
float gamma_;
|
||||
|
||||
//! pyramid scale factor
|
||||
float scale_factor_;
|
||||
|
||||
//! number of lagged non-linearity iterations (inner loop)
|
||||
int inner_iterations_;
|
||||
|
||||
//! number of warping iterations (number of pyramid levels)
|
||||
int outer_iterations_;
|
||||
|
||||
//! number of linear system solver iterations
|
||||
int solver_iterations_;
|
||||
};
|
||||
|
||||
static size_t getBufSize(const NCVBroxOpticalFlowDescriptor& desc,
|
||||
const NCVMatrix<Ncv32f>& frame0, const NCVMatrix<Ncv32f>& frame1,
|
||||
NCVMatrix<Ncv32f>& u, NCVMatrix<Ncv32f>& v,
|
||||
size_t textureAlignment)
|
||||
{
|
||||
NCVMemStackAllocator gpuCounter(static_cast<Ncv32u>(textureAlignment));
|
||||
|
||||
ncvSafeCall( NCVBroxOpticalFlow(desc, gpuCounter, frame0, frame1, u, v, 0) );
|
||||
|
||||
return gpuCounter.maxSize();
|
||||
}
|
||||
|
||||
static void outputHandler(const String &msg)
|
||||
{
|
||||
CV_Error(cv::Error::GpuApiCallError, msg.c_str());
|
||||
}
|
||||
|
||||
void BroxOpticalFlowImpl::calc(InputArray _I0, InputArray _I1, InputOutputArray _flow, Stream& stream)
|
||||
{
|
||||
const GpuMat frame0 = _I0.getGpuMat();
|
||||
const GpuMat frame1 = _I1.getGpuMat();
|
||||
|
||||
CV_Assert( frame0.type() == CV_32FC1 );
|
||||
CV_Assert( frame1.size() == frame0.size() && frame1.type() == frame0.type() );
|
||||
|
||||
ncvSetDebugOutputHandler(outputHandler);
|
||||
|
||||
BufferPool pool(stream);
|
||||
GpuMat u = pool.getBuffer(frame0.size(), CV_32FC1);
|
||||
GpuMat v = pool.getBuffer(frame0.size(), CV_32FC1);
|
||||
|
||||
NCVBroxOpticalFlowDescriptor desc;
|
||||
desc.alpha = alpha_;
|
||||
desc.gamma = gamma_;
|
||||
desc.scale_factor = scale_factor_;
|
||||
desc.number_of_inner_iterations = inner_iterations_;
|
||||
desc.number_of_outer_iterations = outer_iterations_;
|
||||
desc.number_of_solver_iterations = solver_iterations_;
|
||||
|
||||
NCVMemSegment frame0MemSeg;
|
||||
frame0MemSeg.begin.memtype = NCVMemoryTypeDevice;
|
||||
frame0MemSeg.begin.ptr = const_cast<uchar*>(frame0.data);
|
||||
frame0MemSeg.size = frame0.step * frame0.rows;
|
||||
|
||||
NCVMemSegment frame1MemSeg;
|
||||
frame1MemSeg.begin.memtype = NCVMemoryTypeDevice;
|
||||
frame1MemSeg.begin.ptr = const_cast<uchar*>(frame1.data);
|
||||
frame1MemSeg.size = frame1.step * frame1.rows;
|
||||
|
||||
NCVMemSegment uMemSeg;
|
||||
uMemSeg.begin.memtype = NCVMemoryTypeDevice;
|
||||
uMemSeg.begin.ptr = u.ptr();
|
||||
uMemSeg.size = u.step * u.rows;
|
||||
|
||||
NCVMemSegment vMemSeg;
|
||||
vMemSeg.begin.memtype = NCVMemoryTypeDevice;
|
||||
vMemSeg.begin.ptr = v.ptr();
|
||||
vMemSeg.size = v.step * v.rows;
|
||||
|
||||
DeviceInfo devInfo;
|
||||
size_t textureAlignment = devInfo.textureAlignment();
|
||||
|
||||
NCVMatrixReuse<Ncv32f> frame0Mat(frame0MemSeg, static_cast<Ncv32u>(textureAlignment), frame0.cols, frame0.rows, static_cast<Ncv32u>(frame0.step));
|
||||
NCVMatrixReuse<Ncv32f> frame1Mat(frame1MemSeg, static_cast<Ncv32u>(textureAlignment), frame1.cols, frame1.rows, static_cast<Ncv32u>(frame1.step));
|
||||
NCVMatrixReuse<Ncv32f> uMat(uMemSeg, static_cast<Ncv32u>(textureAlignment), u.cols, u.rows, static_cast<Ncv32u>(u.step));
|
||||
NCVMatrixReuse<Ncv32f> vMat(vMemSeg, static_cast<Ncv32u>(textureAlignment), v.cols, v.rows, static_cast<Ncv32u>(v.step));
|
||||
|
||||
size_t bufSize = getBufSize(desc, frame0Mat, frame1Mat, uMat, vMat, textureAlignment);
|
||||
GpuMat buf = pool.getBuffer(1, static_cast<int>(bufSize), CV_8UC1);
|
||||
|
||||
NCVMemStackAllocator gpuAllocator(NCVMemoryTypeDevice, bufSize, static_cast<Ncv32u>(textureAlignment), buf.ptr());
|
||||
|
||||
ncvSafeCall( NCVBroxOpticalFlow(desc, gpuAllocator, frame0Mat, frame1Mat, uMat, vMat, StreamAccessor::getStream(stream)) );
|
||||
|
||||
GpuMat flows[] = {u, v};
|
||||
cuda::merge(flows, 2, _flow, stream);
|
||||
}
|
||||
}
|
||||
|
||||
namespace
|
||||
Ptr<BroxOpticalFlow> cv::cuda::BroxOpticalFlow::create(double alpha, double gamma, double scale_factor, int inner_iterations, int outer_iterations, int solver_iterations)
|
||||
{
|
||||
static void outputHandler(const String &msg) { CV_Error(cv::Error::GpuApiCallError, msg.c_str()); }
|
||||
}
|
||||
|
||||
void cv::cuda::BroxOpticalFlow::operator ()(const GpuMat& frame0, const GpuMat& frame1, GpuMat& u, GpuMat& v, Stream& s)
|
||||
{
|
||||
ncvSetDebugOutputHandler(outputHandler);
|
||||
|
||||
CV_Assert(frame0.type() == CV_32FC1);
|
||||
CV_Assert(frame1.size() == frame0.size() && frame1.type() == frame0.type());
|
||||
|
||||
u.create(frame0.size(), CV_32FC1);
|
||||
v.create(frame0.size(), CV_32FC1);
|
||||
|
||||
cudaDeviceProp devProp;
|
||||
cudaSafeCall( cudaGetDeviceProperties(&devProp, getDevice()) );
|
||||
|
||||
NCVBroxOpticalFlowDescriptor desc;
|
||||
|
||||
desc.alpha = alpha;
|
||||
desc.gamma = gamma;
|
||||
desc.scale_factor = scale_factor;
|
||||
desc.number_of_inner_iterations = inner_iterations;
|
||||
desc.number_of_outer_iterations = outer_iterations;
|
||||
desc.number_of_solver_iterations = solver_iterations;
|
||||
|
||||
NCVMemSegment frame0MemSeg;
|
||||
frame0MemSeg.begin.memtype = NCVMemoryTypeDevice;
|
||||
frame0MemSeg.begin.ptr = const_cast<uchar*>(frame0.data);
|
||||
frame0MemSeg.size = frame0.step * frame0.rows;
|
||||
|
||||
NCVMemSegment frame1MemSeg;
|
||||
frame1MemSeg.begin.memtype = NCVMemoryTypeDevice;
|
||||
frame1MemSeg.begin.ptr = const_cast<uchar*>(frame1.data);
|
||||
frame1MemSeg.size = frame1.step * frame1.rows;
|
||||
|
||||
NCVMemSegment uMemSeg;
|
||||
uMemSeg.begin.memtype = NCVMemoryTypeDevice;
|
||||
uMemSeg.begin.ptr = u.ptr();
|
||||
uMemSeg.size = u.step * u.rows;
|
||||
|
||||
NCVMemSegment vMemSeg;
|
||||
vMemSeg.begin.memtype = NCVMemoryTypeDevice;
|
||||
vMemSeg.begin.ptr = v.ptr();
|
||||
vMemSeg.size = v.step * v.rows;
|
||||
|
||||
NCVMatrixReuse<Ncv32f> frame0Mat(frame0MemSeg, static_cast<Ncv32u>(devProp.textureAlignment), frame0.cols, frame0.rows, static_cast<Ncv32u>(frame0.step));
|
||||
NCVMatrixReuse<Ncv32f> frame1Mat(frame1MemSeg, static_cast<Ncv32u>(devProp.textureAlignment), frame1.cols, frame1.rows, static_cast<Ncv32u>(frame1.step));
|
||||
NCVMatrixReuse<Ncv32f> uMat(uMemSeg, static_cast<Ncv32u>(devProp.textureAlignment), u.cols, u.rows, static_cast<Ncv32u>(u.step));
|
||||
NCVMatrixReuse<Ncv32f> vMat(vMemSeg, static_cast<Ncv32u>(devProp.textureAlignment), v.cols, v.rows, static_cast<Ncv32u>(v.step));
|
||||
|
||||
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||
|
||||
size_t bufSize = getBufSize(desc, frame0Mat, frame1Mat, uMat, vMat, devProp);
|
||||
|
||||
ensureSizeIsEnough(1, static_cast<int>(bufSize), CV_8UC1, buf);
|
||||
|
||||
NCVMemStackAllocator gpuAllocator(NCVMemoryTypeDevice, bufSize, static_cast<Ncv32u>(devProp.textureAlignment), buf.ptr());
|
||||
|
||||
ncvSafeCall( NCVBroxOpticalFlow(desc, gpuAllocator, frame0Mat, frame1Mat, uMat, vMat, stream) );
|
||||
return makePtr<BroxOpticalFlowImpl>(alpha, gamma, scale_factor, inner_iterations, outer_iterations, solver_iterations);
|
||||
}
|
||||
|
||||
#endif /* HAVE_CUDA */
|
||||
|
Reference in New Issue
Block a user