split gpuvideo onto gpuoptflow and gpubgsegm
This commit is contained in:
9
modules/gpuoptflow/CMakeLists.txt
Normal file
9
modules/gpuoptflow/CMakeLists.txt
Normal file
@@ -0,0 +1,9 @@
|
||||
if(ANDROID OR IOS)
|
||||
ocv_module_disable(gpuoptflow)
|
||||
endif()
|
||||
|
||||
set(the_description "GPU-accelerated Optical Flow")
|
||||
|
||||
ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations)
|
||||
|
||||
ocv_define_module(gpuoptflow opencv_video opencv_legacy opencv_gpuarithm opencv_gpuwarping opencv_gpuimgproc OPTIONAL opencv_gpulegacy)
|
8
modules/gpuoptflow/doc/gpuoptflow.rst
Normal file
8
modules/gpuoptflow/doc/gpuoptflow.rst
Normal file
@@ -0,0 +1,8 @@
|
||||
****************************************
|
||||
gpuoptflow. GPU-accelerated Optical Flow
|
||||
****************************************
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
optflow
|
238
modules/gpuoptflow/doc/optflow.rst
Normal file
238
modules/gpuoptflow/doc/optflow.rst
Normal file
@@ -0,0 +1,238 @@
|
||||
Video Analysis
|
||||
==============
|
||||
|
||||
.. highlight:: cpp
|
||||
|
||||
|
||||
|
||||
gpu::BroxOpticalFlow
|
||||
--------------------
|
||||
.. ocv:class:: gpu::BroxOpticalFlow
|
||||
|
||||
Class computing the optical flow for two images using Brox et al Optical Flow algorithm ([Brox2004]_). ::
|
||||
|
||||
class BroxOpticalFlow
|
||||
{
|
||||
public:
|
||||
BroxOpticalFlow(float alpha_, float gamma_, float scale_factor_, int inner_iterations_, int outer_iterations_, int solver_iterations_);
|
||||
|
||||
//! Compute optical flow
|
||||
//! frame0 - source frame (supports only CV_32FC1 type)
|
||||
//! frame1 - frame to track (with the same size and type as frame0)
|
||||
//! u - flow horizontal component (along x axis)
|
||||
//! v - flow vertical component (along y axis)
|
||||
void operator ()(const GpuMat& frame0, const GpuMat& frame1, GpuMat& u, GpuMat& v, Stream& stream = Stream::Null());
|
||||
|
||||
//! flow smoothness
|
||||
float alpha;
|
||||
|
||||
//! gradient constancy importance
|
||||
float gamma;
|
||||
|
||||
//! pyramid scale factor
|
||||
float scale_factor;
|
||||
|
||||
//! number of lagged non-linearity iterations (inner loop)
|
||||
int inner_iterations;
|
||||
|
||||
//! number of warping iterations (number of pyramid levels)
|
||||
int outer_iterations;
|
||||
|
||||
//! number of linear system solver iterations
|
||||
int solver_iterations;
|
||||
|
||||
GpuMat buf;
|
||||
};
|
||||
|
||||
|
||||
|
||||
gpu::GoodFeaturesToTrackDetector_GPU::GoodFeaturesToTrackDetector_GPU
|
||||
---------------------------------------------------------------------
|
||||
Constructor.
|
||||
|
||||
.. ocv:function:: gpu::GoodFeaturesToTrackDetector_GPU::GoodFeaturesToTrackDetector_GPU(int maxCorners = 1000, double qualityLevel = 0.01, double minDistance = 0.0, int blockSize = 3, bool useHarrisDetector = false, double harrisK = 0.04)
|
||||
|
||||
:param maxCorners: Maximum number of corners to return. If there are more corners than are found, the strongest of them is returned.
|
||||
|
||||
:param qualityLevel: Parameter characterizing the minimal accepted quality of image corners. The parameter value is multiplied by the best corner quality measure, which is the minimal eigenvalue (see :ocv:func:`gpu::cornerMinEigenVal` ) or the Harris function response (see :ocv:func:`gpu::cornerHarris` ). The corners with the quality measure less than the product are rejected. For example, if the best corner has the quality measure = 1500, and the ``qualityLevel=0.01`` , then all the corners with the quality measure less than 15 are rejected.
|
||||
|
||||
:param minDistance: Minimum possible Euclidean distance between the returned corners.
|
||||
|
||||
:param blockSize: Size of an average block for computing a derivative covariation matrix over each pixel neighborhood. See :ocv:func:`cornerEigenValsAndVecs` .
|
||||
|
||||
:param useHarrisDetector: Parameter indicating whether to use a Harris detector (see :ocv:func:`gpu::cornerHarris`) or :ocv:func:`gpu::cornerMinEigenVal`.
|
||||
|
||||
:param harrisK: Free parameter of the Harris detector.
|
||||
|
||||
|
||||
gpu::FarnebackOpticalFlow
|
||||
-------------------------
|
||||
.. ocv:class:: gpu::FarnebackOpticalFlow
|
||||
|
||||
Class computing a dense optical flow using the Gunnar Farneback’s algorithm. ::
|
||||
|
||||
class CV_EXPORTS FarnebackOpticalFlow
|
||||
{
|
||||
public:
|
||||
FarnebackOpticalFlow()
|
||||
{
|
||||
numLevels = 5;
|
||||
pyrScale = 0.5;
|
||||
fastPyramids = false;
|
||||
winSize = 13;
|
||||
numIters = 10;
|
||||
polyN = 5;
|
||||
polySigma = 1.1;
|
||||
flags = 0;
|
||||
}
|
||||
|
||||
int numLevels;
|
||||
double pyrScale;
|
||||
bool fastPyramids;
|
||||
int winSize;
|
||||
int numIters;
|
||||
int polyN;
|
||||
double polySigma;
|
||||
int flags;
|
||||
|
||||
void operator ()(const GpuMat &frame0, const GpuMat &frame1, GpuMat &flowx, GpuMat &flowy, Stream &s = Stream::Null());
|
||||
|
||||
void releaseMemory();
|
||||
|
||||
private:
|
||||
/* hidden */
|
||||
};
|
||||
|
||||
|
||||
|
||||
gpu::FarnebackOpticalFlow::operator ()
|
||||
--------------------------------------
|
||||
Computes a dense optical flow using the Gunnar Farneback’s algorithm.
|
||||
|
||||
.. ocv:function:: void gpu::FarnebackOpticalFlow::operator ()(const GpuMat &frame0, const GpuMat &frame1, GpuMat &flowx, GpuMat &flowy, Stream &s = Stream::Null())
|
||||
|
||||
:param frame0: First 8-bit gray-scale input image
|
||||
:param frame1: Second 8-bit gray-scale input image
|
||||
:param flowx: Flow horizontal component
|
||||
:param flowy: Flow vertical component
|
||||
:param s: Stream
|
||||
|
||||
.. seealso:: :ocv:func:`calcOpticalFlowFarneback`
|
||||
|
||||
|
||||
|
||||
gpu::FarnebackOpticalFlow::releaseMemory
|
||||
----------------------------------------
|
||||
Releases unused auxiliary memory buffers.
|
||||
|
||||
.. ocv:function:: void gpu::FarnebackOpticalFlow::releaseMemory()
|
||||
|
||||
|
||||
|
||||
gpu::PyrLKOpticalFlow
|
||||
---------------------
|
||||
.. ocv:class:: gpu::PyrLKOpticalFlow
|
||||
|
||||
Class used for calculating an optical flow. ::
|
||||
|
||||
class PyrLKOpticalFlow
|
||||
{
|
||||
public:
|
||||
PyrLKOpticalFlow();
|
||||
|
||||
void sparse(const GpuMat& prevImg, const GpuMat& nextImg, const GpuMat& prevPts, GpuMat& nextPts,
|
||||
GpuMat& status, GpuMat* err = 0);
|
||||
|
||||
void dense(const GpuMat& prevImg, const GpuMat& nextImg, GpuMat& u, GpuMat& v, GpuMat* err = 0);
|
||||
|
||||
Size winSize;
|
||||
int maxLevel;
|
||||
int iters;
|
||||
bool useInitialFlow;
|
||||
|
||||
void releaseMemory();
|
||||
};
|
||||
|
||||
The class can calculate an optical flow for a sparse feature set or dense optical flow using the iterative Lucas-Kanade method with pyramids.
|
||||
|
||||
.. seealso:: :ocv:func:`calcOpticalFlowPyrLK`
|
||||
|
||||
|
||||
|
||||
gpu::PyrLKOpticalFlow::sparse
|
||||
-----------------------------
|
||||
Calculate an optical flow for a sparse feature set.
|
||||
|
||||
.. ocv:function:: void gpu::PyrLKOpticalFlow::sparse(const GpuMat& prevImg, const GpuMat& nextImg, const GpuMat& prevPts, GpuMat& nextPts, GpuMat& status, GpuMat* err = 0)
|
||||
|
||||
:param prevImg: First 8-bit input image (supports both grayscale and color images).
|
||||
|
||||
:param nextImg: Second input image of the same size and the same type as ``prevImg`` .
|
||||
|
||||
:param prevPts: Vector of 2D points for which the flow needs to be found. It must be one row matrix with CV_32FC2 type.
|
||||
|
||||
:param nextPts: Output vector of 2D points (with single-precision floating-point coordinates) containing the calculated new positions of input features in the second image. When ``useInitialFlow`` is true, the vector must have the same size as in the input.
|
||||
|
||||
:param status: Output status vector (CV_8UC1 type). Each element of the vector is set to 1 if the flow for the corresponding features has been found. Otherwise, it is set to 0.
|
||||
|
||||
:param err: Output vector (CV_32FC1 type) that contains the difference between patches around the original and moved points or min eigen value if ``getMinEigenVals`` is checked. It can be NULL, if not needed.
|
||||
|
||||
.. seealso:: :ocv:func:`calcOpticalFlowPyrLK`
|
||||
|
||||
|
||||
|
||||
gpu::PyrLKOpticalFlow::dense
|
||||
-----------------------------
|
||||
Calculate dense optical flow.
|
||||
|
||||
.. ocv:function:: void gpu::PyrLKOpticalFlow::dense(const GpuMat& prevImg, const GpuMat& nextImg, GpuMat& u, GpuMat& v, GpuMat* err = 0)
|
||||
|
||||
:param prevImg: First 8-bit grayscale input image.
|
||||
|
||||
:param nextImg: Second input image of the same size and the same type as ``prevImg`` .
|
||||
|
||||
:param u: Horizontal component of the optical flow of the same size as input images, 32-bit floating-point, single-channel
|
||||
|
||||
:param v: Vertical component of the optical flow of the same size as input images, 32-bit floating-point, single-channel
|
||||
|
||||
:param err: Output vector (CV_32FC1 type) that contains the difference between patches around the original and moved points or min eigen value if ``getMinEigenVals`` is checked. It can be NULL, if not needed.
|
||||
|
||||
|
||||
|
||||
gpu::PyrLKOpticalFlow::releaseMemory
|
||||
------------------------------------
|
||||
Releases inner buffers memory.
|
||||
|
||||
.. ocv:function:: void gpu::PyrLKOpticalFlow::releaseMemory()
|
||||
|
||||
|
||||
|
||||
gpu::interpolateFrames
|
||||
----------------------
|
||||
Interpolates frames (images) using provided optical flow (displacement field).
|
||||
|
||||
.. ocv:function:: void gpu::interpolateFrames(const GpuMat& frame0, const GpuMat& frame1, const GpuMat& fu, const GpuMat& fv, const GpuMat& bu, const GpuMat& bv, float pos, GpuMat& newFrame, GpuMat& buf, Stream& stream = Stream::Null())
|
||||
|
||||
:param frame0: First frame (32-bit floating point images, single channel).
|
||||
|
||||
:param frame1: Second frame. Must have the same type and size as ``frame0`` .
|
||||
|
||||
:param fu: Forward horizontal displacement.
|
||||
|
||||
:param fv: Forward vertical displacement.
|
||||
|
||||
:param bu: Backward horizontal displacement.
|
||||
|
||||
:param bv: Backward vertical displacement.
|
||||
|
||||
:param pos: New frame position.
|
||||
|
||||
:param newFrame: Output image.
|
||||
|
||||
:param buf: Temporary buffer, will have width x 6*height size, CV_32FC1 type and contain 6 GpuMat: occlusion masks for first frame, occlusion masks for second, interpolated forward horizontal flow, interpolated forward vertical flow, interpolated backward horizontal flow, interpolated backward vertical flow.
|
||||
|
||||
:param stream: Stream for the asynchronous version.
|
||||
|
||||
|
||||
|
||||
.. [Brox2004] T. Brox, A. Bruhn, N. Papenberg, J. Weickert. *High accuracy optical flow estimation based on a theory for warping*. ECCV 2004.
|
310
modules/gpuoptflow/include/opencv2/gpuoptflow.hpp
Normal file
310
modules/gpuoptflow/include/opencv2/gpuoptflow.hpp
Normal file
@@ -0,0 +1,310 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_GPUOPTFLOW_HPP__
|
||||
#define __OPENCV_GPUOPTFLOW_HPP__
|
||||
|
||||
#include "opencv2/core/gpumat.hpp"
|
||||
|
||||
namespace cv { namespace gpu {
|
||||
|
||||
////////////////////////////////// Optical Flow //////////////////////////////////////////
|
||||
|
||||
class CV_EXPORTS BroxOpticalFlow
|
||||
{
|
||||
public:
|
||||
BroxOpticalFlow(float alpha_, float gamma_, float scale_factor_, int inner_iterations_, int outer_iterations_, int solver_iterations_) :
|
||||
alpha(alpha_), gamma(gamma_), scale_factor(scale_factor_),
|
||||
inner_iterations(inner_iterations_), outer_iterations(outer_iterations_), solver_iterations(solver_iterations_)
|
||||
{
|
||||
}
|
||||
|
||||
//! Compute optical flow
|
||||
//! frame0 - source frame (supports only CV_32FC1 type)
|
||||
//! frame1 - frame to track (with the same size and type as frame0)
|
||||
//! u - flow horizontal component (along x axis)
|
||||
//! v - flow vertical component (along y axis)
|
||||
void operator ()(const GpuMat& frame0, const GpuMat& frame1, GpuMat& u, GpuMat& v, Stream& stream = Stream::Null());
|
||||
|
||||
//! flow smoothness
|
||||
float alpha;
|
||||
|
||||
//! gradient constancy importance
|
||||
float gamma;
|
||||
|
||||
//! pyramid scale factor
|
||||
float scale_factor;
|
||||
|
||||
//! number of lagged non-linearity iterations (inner loop)
|
||||
int inner_iterations;
|
||||
|
||||
//! number of warping iterations (number of pyramid levels)
|
||||
int outer_iterations;
|
||||
|
||||
//! number of linear system solver iterations
|
||||
int solver_iterations;
|
||||
|
||||
GpuMat buf;
|
||||
};
|
||||
|
||||
class CV_EXPORTS PyrLKOpticalFlow
|
||||
{
|
||||
public:
|
||||
PyrLKOpticalFlow();
|
||||
|
||||
void sparse(const GpuMat& prevImg, const GpuMat& nextImg, const GpuMat& prevPts, GpuMat& nextPts,
|
||||
GpuMat& status, GpuMat* err = 0);
|
||||
|
||||
void dense(const GpuMat& prevImg, const GpuMat& nextImg, GpuMat& u, GpuMat& v, GpuMat* err = 0);
|
||||
|
||||
void releaseMemory();
|
||||
|
||||
Size winSize;
|
||||
int maxLevel;
|
||||
int iters;
|
||||
bool useInitialFlow;
|
||||
|
||||
private:
|
||||
std::vector<GpuMat> prevPyr_;
|
||||
std::vector<GpuMat> nextPyr_;
|
||||
|
||||
GpuMat buf_;
|
||||
|
||||
GpuMat uPyr_[2];
|
||||
GpuMat vPyr_[2];
|
||||
};
|
||||
|
||||
class CV_EXPORTS FarnebackOpticalFlow
|
||||
{
|
||||
public:
|
||||
FarnebackOpticalFlow()
|
||||
{
|
||||
numLevels = 5;
|
||||
pyrScale = 0.5;
|
||||
fastPyramids = false;
|
||||
winSize = 13;
|
||||
numIters = 10;
|
||||
polyN = 5;
|
||||
polySigma = 1.1;
|
||||
flags = 0;
|
||||
}
|
||||
|
||||
int numLevels;
|
||||
double pyrScale;
|
||||
bool fastPyramids;
|
||||
int winSize;
|
||||
int numIters;
|
||||
int polyN;
|
||||
double polySigma;
|
||||
int flags;
|
||||
|
||||
void operator ()(const GpuMat &frame0, const GpuMat &frame1, GpuMat &flowx, GpuMat &flowy, Stream &s = Stream::Null());
|
||||
|
||||
void releaseMemory()
|
||||
{
|
||||
frames_[0].release();
|
||||
frames_[1].release();
|
||||
pyrLevel_[0].release();
|
||||
pyrLevel_[1].release();
|
||||
M_.release();
|
||||
bufM_.release();
|
||||
R_[0].release();
|
||||
R_[1].release();
|
||||
blurredFrame_[0].release();
|
||||
blurredFrame_[1].release();
|
||||
pyramid0_.clear();
|
||||
pyramid1_.clear();
|
||||
}
|
||||
|
||||
private:
|
||||
void prepareGaussian(
|
||||
int n, double sigma, float *g, float *xg, float *xxg,
|
||||
double &ig11, double &ig03, double &ig33, double &ig55);
|
||||
|
||||
void setPolynomialExpansionConsts(int n, double sigma);
|
||||
|
||||
void updateFlow_boxFilter(
|
||||
const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat &flowy,
|
||||
GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[]);
|
||||
|
||||
void updateFlow_gaussianBlur(
|
||||
const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat& flowy,
|
||||
GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[]);
|
||||
|
||||
GpuMat frames_[2];
|
||||
GpuMat pyrLevel_[2], M_, bufM_, R_[2], blurredFrame_[2];
|
||||
std::vector<GpuMat> pyramid0_, pyramid1_;
|
||||
};
|
||||
|
||||
// Implementation of the Zach, Pock and Bischof Dual TV-L1 Optical Flow method
|
||||
//
|
||||
// see reference:
|
||||
// [1] C. Zach, T. Pock and H. Bischof, "A Duality Based Approach for Realtime TV-L1 Optical Flow".
|
||||
// [2] Javier Sanchez, Enric Meinhardt-Llopis and Gabriele Facciolo. "TV-L1 Optical Flow Estimation".
|
||||
class CV_EXPORTS OpticalFlowDual_TVL1_GPU
|
||||
{
|
||||
public:
|
||||
OpticalFlowDual_TVL1_GPU();
|
||||
|
||||
void operator ()(const GpuMat& I0, const GpuMat& I1, GpuMat& flowx, GpuMat& flowy);
|
||||
|
||||
void collectGarbage();
|
||||
|
||||
/**
|
||||
* Time step of the numerical scheme.
|
||||
*/
|
||||
double tau;
|
||||
|
||||
/**
|
||||
* Weight parameter for the data term, attachment parameter.
|
||||
* This is the most relevant parameter, which determines the smoothness of the output.
|
||||
* The smaller this parameter is, the smoother the solutions we obtain.
|
||||
* It depends on the range of motions of the images, so its value should be adapted to each image sequence.
|
||||
*/
|
||||
double lambda;
|
||||
|
||||
/**
|
||||
* Weight parameter for (u - v)^2, tightness parameter.
|
||||
* It serves as a link between the attachment and the regularization terms.
|
||||
* In theory, it should have a small value in order to maintain both parts in correspondence.
|
||||
* The method is stable for a large range of values of this parameter.
|
||||
*/
|
||||
double theta;
|
||||
|
||||
/**
|
||||
* Number of scales used to create the pyramid of images.
|
||||
*/
|
||||
int nscales;
|
||||
|
||||
/**
|
||||
* Number of warpings per scale.
|
||||
* Represents the number of times that I1(x+u0) and grad( I1(x+u0) ) are computed per scale.
|
||||
* This is a parameter that assures the stability of the method.
|
||||
* It also affects the running time, so it is a compromise between speed and accuracy.
|
||||
*/
|
||||
int warps;
|
||||
|
||||
/**
|
||||
* Stopping criterion threshold used in the numerical scheme, which is a trade-off between precision and running time.
|
||||
* A small value will yield more accurate solutions at the expense of a slower convergence.
|
||||
*/
|
||||
double epsilon;
|
||||
|
||||
/**
|
||||
* Stopping criterion iterations number used in the numerical scheme.
|
||||
*/
|
||||
int iterations;
|
||||
|
||||
double scaleStep;
|
||||
|
||||
bool useInitialFlow;
|
||||
|
||||
private:
|
||||
void procOneScale(const GpuMat& I0, const GpuMat& I1, GpuMat& u1, GpuMat& u2);
|
||||
|
||||
std::vector<GpuMat> I0s;
|
||||
std::vector<GpuMat> I1s;
|
||||
std::vector<GpuMat> u1s;
|
||||
std::vector<GpuMat> u2s;
|
||||
|
||||
GpuMat I1x_buf;
|
||||
GpuMat I1y_buf;
|
||||
|
||||
GpuMat I1w_buf;
|
||||
GpuMat I1wx_buf;
|
||||
GpuMat I1wy_buf;
|
||||
|
||||
GpuMat grad_buf;
|
||||
GpuMat rho_c_buf;
|
||||
|
||||
GpuMat p11_buf;
|
||||
GpuMat p12_buf;
|
||||
GpuMat p21_buf;
|
||||
GpuMat p22_buf;
|
||||
|
||||
GpuMat diff_buf;
|
||||
GpuMat norm_buf;
|
||||
};
|
||||
|
||||
//! Calculates optical flow for 2 images using block matching algorithm */
|
||||
CV_EXPORTS void calcOpticalFlowBM(const GpuMat& prev, const GpuMat& curr,
|
||||
Size block_size, Size shift_size, Size max_range, bool use_previous,
|
||||
GpuMat& velx, GpuMat& vely, GpuMat& buf,
|
||||
Stream& stream = Stream::Null());
|
||||
|
||||
class CV_EXPORTS FastOpticalFlowBM
|
||||
{
|
||||
public:
|
||||
void operator ()(const GpuMat& I0, const GpuMat& I1, GpuMat& flowx, GpuMat& flowy, int search_window = 21, int block_window = 7, Stream& s = Stream::Null());
|
||||
|
||||
private:
|
||||
GpuMat buffer;
|
||||
GpuMat extended_I0;
|
||||
GpuMat extended_I1;
|
||||
};
|
||||
|
||||
|
||||
//! Interpolate frames (images) using provided optical flow (displacement field).
|
||||
//! frame0 - frame 0 (32-bit floating point images, single channel)
|
||||
//! frame1 - frame 1 (the same type and size)
|
||||
//! fu - forward horizontal displacement
|
||||
//! fv - forward vertical displacement
|
||||
//! bu - backward horizontal displacement
|
||||
//! bv - backward vertical displacement
|
||||
//! pos - new frame position
|
||||
//! newFrame - new frame
|
||||
//! buf - temporary buffer, will have width x 6*height size, CV_32FC1 type and contain 6 GpuMat;
|
||||
//! occlusion masks 0, occlusion masks 1,
|
||||
//! interpolated forward flow 0, interpolated forward flow 1,
|
||||
//! interpolated backward flow 0, interpolated backward flow 1
|
||||
//!
|
||||
CV_EXPORTS void interpolateFrames(const GpuMat& frame0, const GpuMat& frame1,
|
||||
const GpuMat& fu, const GpuMat& fv,
|
||||
const GpuMat& bu, const GpuMat& bv,
|
||||
float pos, GpuMat& newFrame, GpuMat& buf,
|
||||
Stream& stream = Stream::Null());
|
||||
|
||||
CV_EXPORTS void createOpticalFlowNeedleMap(const GpuMat& u, const GpuMat& v, GpuMat& vertex, GpuMat& colors);
|
||||
|
||||
}} // namespace cv { namespace gpu {
|
||||
|
||||
#endif /* __OPENCV_GPUOPTFLOW_HPP__ */
|
47
modules/gpuoptflow/perf/perf_main.cpp
Normal file
47
modules/gpuoptflow/perf/perf_main.cpp
Normal file
@@ -0,0 +1,47 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "perf_precomp.hpp"
|
||||
|
||||
using namespace perf;
|
||||
|
||||
CV_PERF_TEST_MAIN(gpuoptflow, printCudaInfo())
|
479
modules/gpuoptflow/perf/perf_optflow.cpp
Normal file
479
modules/gpuoptflow/perf/perf_optflow.cpp
Normal file
@@ -0,0 +1,479 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "perf_precomp.hpp"
|
||||
#include "opencv2/legacy.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace testing;
|
||||
using namespace perf;
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// InterpolateFrames
|
||||
|
||||
typedef pair<string, string> pair_string;
|
||||
|
||||
DEF_PARAM_TEST_1(ImagePair, pair_string);
|
||||
|
||||
PERF_TEST_P(ImagePair, Video_InterpolateFrames,
|
||||
Values<pair_string>(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png")))
|
||||
{
|
||||
cv::Mat frame0 = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(frame0.empty());
|
||||
|
||||
cv::Mat frame1 = readImage(GetParam().second, cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(frame1.empty());
|
||||
|
||||
frame0.convertTo(frame0, CV_32FC1, 1.0 / 255.0);
|
||||
frame1.convertTo(frame1, CV_32FC1, 1.0 / 255.0);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
const cv::gpu::GpuMat d_frame0(frame0);
|
||||
const cv::gpu::GpuMat d_frame1(frame1);
|
||||
cv::gpu::GpuMat d_fu, d_fv;
|
||||
cv::gpu::GpuMat d_bu, d_bv;
|
||||
|
||||
cv::gpu::BroxOpticalFlow d_flow(0.197f /*alpha*/, 50.0f /*gamma*/, 0.8f /*scale_factor*/,
|
||||
10 /*inner_iterations*/, 77 /*outer_iterations*/, 10 /*solver_iterations*/);
|
||||
|
||||
d_flow(d_frame0, d_frame1, d_fu, d_fv);
|
||||
d_flow(d_frame1, d_frame0, d_bu, d_bv);
|
||||
|
||||
cv::gpu::GpuMat newFrame;
|
||||
cv::gpu::GpuMat d_buf;
|
||||
|
||||
TEST_CYCLE() cv::gpu::interpolateFrames(d_frame0, d_frame1, d_fu, d_fv, d_bu, d_bv, 0.5f, newFrame, d_buf);
|
||||
|
||||
GPU_SANITY_CHECK(newFrame);
|
||||
}
|
||||
else
|
||||
{
|
||||
FAIL_NO_CPU();
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// CreateOpticalFlowNeedleMap
|
||||
|
||||
PERF_TEST_P(ImagePair, Video_CreateOpticalFlowNeedleMap,
|
||||
Values<pair_string>(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png")))
|
||||
{
|
||||
cv::Mat frame0 = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(frame0.empty());
|
||||
|
||||
cv::Mat frame1 = readImage(GetParam().second, cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(frame1.empty());
|
||||
|
||||
frame0.convertTo(frame0, CV_32FC1, 1.0 / 255.0);
|
||||
frame1.convertTo(frame1, CV_32FC1, 1.0 / 255.0);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
const cv::gpu::GpuMat d_frame0(frame0);
|
||||
const cv::gpu::GpuMat d_frame1(frame1);
|
||||
cv::gpu::GpuMat u;
|
||||
cv::gpu::GpuMat v;
|
||||
|
||||
cv::gpu::BroxOpticalFlow d_flow(0.197f /*alpha*/, 50.0f /*gamma*/, 0.8f /*scale_factor*/,
|
||||
10 /*inner_iterations*/, 77 /*outer_iterations*/, 10 /*solver_iterations*/);
|
||||
|
||||
d_flow(d_frame0, d_frame1, u, v);
|
||||
|
||||
cv::gpu::GpuMat vertex, colors;
|
||||
|
||||
TEST_CYCLE() cv::gpu::createOpticalFlowNeedleMap(u, v, vertex, colors);
|
||||
|
||||
GPU_SANITY_CHECK(vertex);
|
||||
GPU_SANITY_CHECK(colors);
|
||||
}
|
||||
else
|
||||
{
|
||||
FAIL_NO_CPU();
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// BroxOpticalFlow
|
||||
|
||||
PERF_TEST_P(ImagePair, Video_BroxOpticalFlow,
|
||||
Values<pair_string>(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png")))
|
||||
{
|
||||
declare.time(300);
|
||||
|
||||
cv::Mat frame0 = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(frame0.empty());
|
||||
|
||||
cv::Mat frame1 = readImage(GetParam().second, cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(frame1.empty());
|
||||
|
||||
frame0.convertTo(frame0, CV_32FC1, 1.0 / 255.0);
|
||||
frame1.convertTo(frame1, CV_32FC1, 1.0 / 255.0);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
const cv::gpu::GpuMat d_frame0(frame0);
|
||||
const cv::gpu::GpuMat d_frame1(frame1);
|
||||
cv::gpu::GpuMat u;
|
||||
cv::gpu::GpuMat v;
|
||||
|
||||
cv::gpu::BroxOpticalFlow d_flow(0.197f /*alpha*/, 50.0f /*gamma*/, 0.8f /*scale_factor*/,
|
||||
10 /*inner_iterations*/, 77 /*outer_iterations*/, 10 /*solver_iterations*/);
|
||||
|
||||
TEST_CYCLE() d_flow(d_frame0, d_frame1, u, v);
|
||||
|
||||
GPU_SANITY_CHECK(u);
|
||||
GPU_SANITY_CHECK(v);
|
||||
}
|
||||
else
|
||||
{
|
||||
FAIL_NO_CPU();
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// PyrLKOpticalFlowSparse
|
||||
|
||||
DEF_PARAM_TEST(ImagePair_Gray_NPts_WinSz_Levels_Iters, pair_string, bool, int, int, int, int);
|
||||
|
||||
PERF_TEST_P(ImagePair_Gray_NPts_WinSz_Levels_Iters, Video_PyrLKOpticalFlowSparse,
|
||||
Combine(Values<pair_string>(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png")),
|
||||
Bool(),
|
||||
Values(8000),
|
||||
Values(21),
|
||||
Values(1, 3),
|
||||
Values(1, 30)))
|
||||
{
|
||||
declare.time(20.0);
|
||||
|
||||
const pair_string imagePair = GET_PARAM(0);
|
||||
const bool useGray = GET_PARAM(1);
|
||||
const int points = GET_PARAM(2);
|
||||
const int winSize = GET_PARAM(3);
|
||||
const int levels = GET_PARAM(4);
|
||||
const int iters = GET_PARAM(5);
|
||||
|
||||
const cv::Mat frame0 = readImage(imagePair.first, useGray ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR);
|
||||
ASSERT_FALSE(frame0.empty());
|
||||
|
||||
const cv::Mat frame1 = readImage(imagePair.second, useGray ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR);
|
||||
ASSERT_FALSE(frame1.empty());
|
||||
|
||||
cv::Mat gray_frame;
|
||||
if (useGray)
|
||||
gray_frame = frame0;
|
||||
else
|
||||
cv::cvtColor(frame0, gray_frame, cv::COLOR_BGR2GRAY);
|
||||
|
||||
cv::Mat pts;
|
||||
cv::goodFeaturesToTrack(gray_frame, pts, points, 0.01, 0.0);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
const cv::gpu::GpuMat d_pts(pts.reshape(2, 1));
|
||||
|
||||
cv::gpu::PyrLKOpticalFlow d_pyrLK;
|
||||
d_pyrLK.winSize = cv::Size(winSize, winSize);
|
||||
d_pyrLK.maxLevel = levels - 1;
|
||||
d_pyrLK.iters = iters;
|
||||
|
||||
const cv::gpu::GpuMat d_frame0(frame0);
|
||||
const cv::gpu::GpuMat d_frame1(frame1);
|
||||
cv::gpu::GpuMat nextPts;
|
||||
cv::gpu::GpuMat status;
|
||||
|
||||
TEST_CYCLE() d_pyrLK.sparse(d_frame0, d_frame1, d_pts, nextPts, status);
|
||||
|
||||
GPU_SANITY_CHECK(nextPts);
|
||||
GPU_SANITY_CHECK(status);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat nextPts;
|
||||
cv::Mat status;
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts, status, cv::noArray(),
|
||||
cv::Size(winSize, winSize), levels - 1,
|
||||
cv::TermCriteria(cv::TermCriteria::COUNT + cv::TermCriteria::EPS, iters, 0.01));
|
||||
}
|
||||
|
||||
CPU_SANITY_CHECK(nextPts);
|
||||
CPU_SANITY_CHECK(status);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// PyrLKOpticalFlowDense
|
||||
|
||||
DEF_PARAM_TEST(ImagePair_WinSz_Levels_Iters, pair_string, int, int, int);
|
||||
|
||||
PERF_TEST_P(ImagePair_WinSz_Levels_Iters, Video_PyrLKOpticalFlowDense,
|
||||
Combine(Values<pair_string>(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png")),
|
||||
Values(3, 5, 7, 9, 13, 17, 21),
|
||||
Values(1, 3),
|
||||
Values(1, 10)))
|
||||
{
|
||||
declare.time(30);
|
||||
|
||||
const pair_string imagePair = GET_PARAM(0);
|
||||
const int winSize = GET_PARAM(1);
|
||||
const int levels = GET_PARAM(2);
|
||||
const int iters = GET_PARAM(3);
|
||||
|
||||
const cv::Mat frame0 = readImage(imagePair.first, cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(frame0.empty());
|
||||
|
||||
const cv::Mat frame1 = readImage(imagePair.second, cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(frame1.empty());
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
const cv::gpu::GpuMat d_frame0(frame0);
|
||||
const cv::gpu::GpuMat d_frame1(frame1);
|
||||
cv::gpu::GpuMat u;
|
||||
cv::gpu::GpuMat v;
|
||||
|
||||
cv::gpu::PyrLKOpticalFlow d_pyrLK;
|
||||
d_pyrLK.winSize = cv::Size(winSize, winSize);
|
||||
d_pyrLK.maxLevel = levels - 1;
|
||||
d_pyrLK.iters = iters;
|
||||
|
||||
TEST_CYCLE() d_pyrLK.dense(d_frame0, d_frame1, u, v);
|
||||
|
||||
GPU_SANITY_CHECK(u);
|
||||
GPU_SANITY_CHECK(v);
|
||||
}
|
||||
else
|
||||
{
|
||||
FAIL_NO_CPU();
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// FarnebackOpticalFlow
|
||||
|
||||
PERF_TEST_P(ImagePair, Video_FarnebackOpticalFlow,
|
||||
Values<pair_string>(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png")))
|
||||
{
|
||||
declare.time(10);
|
||||
|
||||
const cv::Mat frame0 = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(frame0.empty());
|
||||
|
||||
const cv::Mat frame1 = readImage(GetParam().second, cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(frame1.empty());
|
||||
|
||||
const int numLevels = 5;
|
||||
const double pyrScale = 0.5;
|
||||
const int winSize = 13;
|
||||
const int numIters = 10;
|
||||
const int polyN = 5;
|
||||
const double polySigma = 1.1;
|
||||
const int flags = 0;
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
const cv::gpu::GpuMat d_frame0(frame0);
|
||||
const cv::gpu::GpuMat d_frame1(frame1);
|
||||
cv::gpu::GpuMat u;
|
||||
cv::gpu::GpuMat v;
|
||||
|
||||
cv::gpu::FarnebackOpticalFlow d_farneback;
|
||||
d_farneback.numLevels = numLevels;
|
||||
d_farneback.pyrScale = pyrScale;
|
||||
d_farneback.winSize = winSize;
|
||||
d_farneback.numIters = numIters;
|
||||
d_farneback.polyN = polyN;
|
||||
d_farneback.polySigma = polySigma;
|
||||
d_farneback.flags = flags;
|
||||
|
||||
TEST_CYCLE() d_farneback(d_frame0, d_frame1, u, v);
|
||||
|
||||
GPU_SANITY_CHECK(u, 1e-4);
|
||||
GPU_SANITY_CHECK(v, 1e-4);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat flow;
|
||||
|
||||
TEST_CYCLE() cv::calcOpticalFlowFarneback(frame0, frame1, flow, pyrScale, numLevels, winSize, numIters, polyN, polySigma, flags);
|
||||
|
||||
CPU_SANITY_CHECK(flow);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// OpticalFlowDual_TVL1
|
||||
|
||||
PERF_TEST_P(ImagePair, Video_OpticalFlowDual_TVL1,
|
||||
Values<pair_string>(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png")))
|
||||
{
|
||||
declare.time(20);
|
||||
|
||||
const cv::Mat frame0 = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(frame0.empty());
|
||||
|
||||
const cv::Mat frame1 = readImage(GetParam().second, cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(frame1.empty());
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
const cv::gpu::GpuMat d_frame0(frame0);
|
||||
const cv::gpu::GpuMat d_frame1(frame1);
|
||||
cv::gpu::GpuMat u;
|
||||
cv::gpu::GpuMat v;
|
||||
|
||||
cv::gpu::OpticalFlowDual_TVL1_GPU d_alg;
|
||||
|
||||
TEST_CYCLE() d_alg(d_frame0, d_frame1, u, v);
|
||||
|
||||
GPU_SANITY_CHECK(u, 1e-2);
|
||||
GPU_SANITY_CHECK(v, 1e-2);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat flow;
|
||||
|
||||
cv::Ptr<cv::DenseOpticalFlow> alg = cv::createOptFlow_DualTVL1();
|
||||
alg->set("medianFiltering", 1);
|
||||
alg->set("innerIterations", 1);
|
||||
alg->set("outerIterations", 300);
|
||||
|
||||
TEST_CYCLE() alg->calc(frame0, frame1, flow);
|
||||
|
||||
CPU_SANITY_CHECK(flow);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// OpticalFlowBM
|
||||
|
||||
void calcOpticalFlowBM(const cv::Mat& prev, const cv::Mat& curr,
|
||||
cv::Size bSize, cv::Size shiftSize, cv::Size maxRange, int usePrevious,
|
||||
cv::Mat& velx, cv::Mat& vely)
|
||||
{
|
||||
cv::Size sz((curr.cols - bSize.width + shiftSize.width)/shiftSize.width, (curr.rows - bSize.height + shiftSize.height)/shiftSize.height);
|
||||
|
||||
velx.create(sz, CV_32FC1);
|
||||
vely.create(sz, CV_32FC1);
|
||||
|
||||
CvMat cvprev = prev;
|
||||
CvMat cvcurr = curr;
|
||||
|
||||
CvMat cvvelx = velx;
|
||||
CvMat cvvely = vely;
|
||||
|
||||
cvCalcOpticalFlowBM(&cvprev, &cvcurr, bSize, shiftSize, maxRange, usePrevious, &cvvelx, &cvvely);
|
||||
}
|
||||
|
||||
PERF_TEST_P(ImagePair, Video_OpticalFlowBM,
|
||||
Values<pair_string>(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png")))
|
||||
{
|
||||
declare.time(400);
|
||||
|
||||
const cv::Mat frame0 = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(frame0.empty());
|
||||
|
||||
const cv::Mat frame1 = readImage(GetParam().second, cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(frame1.empty());
|
||||
|
||||
const cv::Size block_size(16, 16);
|
||||
const cv::Size shift_size(1, 1);
|
||||
const cv::Size max_range(16, 16);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
const cv::gpu::GpuMat d_frame0(frame0);
|
||||
const cv::gpu::GpuMat d_frame1(frame1);
|
||||
cv::gpu::GpuMat u, v, buf;
|
||||
|
||||
TEST_CYCLE() cv::gpu::calcOpticalFlowBM(d_frame0, d_frame1, block_size, shift_size, max_range, false, u, v, buf);
|
||||
|
||||
GPU_SANITY_CHECK(u);
|
||||
GPU_SANITY_CHECK(v);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat u, v;
|
||||
|
||||
TEST_CYCLE() calcOpticalFlowBM(frame0, frame1, block_size, shift_size, max_range, false, u, v);
|
||||
|
||||
CPU_SANITY_CHECK(u);
|
||||
CPU_SANITY_CHECK(v);
|
||||
}
|
||||
}
|
||||
|
||||
PERF_TEST_P(ImagePair, Video_FastOpticalFlowBM,
|
||||
Values<pair_string>(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png")))
|
||||
{
|
||||
declare.time(400);
|
||||
|
||||
const cv::Mat frame0 = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(frame0.empty());
|
||||
|
||||
const cv::Mat frame1 = readImage(GetParam().second, cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(frame1.empty());
|
||||
|
||||
const cv::Size block_size(16, 16);
|
||||
const cv::Size shift_size(1, 1);
|
||||
const cv::Size max_range(16, 16);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
const cv::gpu::GpuMat d_frame0(frame0);
|
||||
const cv::gpu::GpuMat d_frame1(frame1);
|
||||
cv::gpu::GpuMat u, v;
|
||||
|
||||
cv::gpu::FastOpticalFlowBM fastBM;
|
||||
|
||||
TEST_CYCLE() fastBM(d_frame0, d_frame1, u, v, max_range.width, block_size.width);
|
||||
|
||||
GPU_SANITY_CHECK(u, 2);
|
||||
GPU_SANITY_CHECK(v, 2);
|
||||
}
|
||||
else
|
||||
{
|
||||
FAIL_NO_CPU();
|
||||
}
|
||||
}
|
43
modules/gpuoptflow/perf/perf_precomp.cpp
Normal file
43
modules/gpuoptflow/perf/perf_precomp.cpp
Normal file
@@ -0,0 +1,43 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "perf_precomp.hpp"
|
66
modules/gpuoptflow/perf/perf_precomp.hpp
Normal file
66
modules/gpuoptflow/perf/perf_precomp.hpp
Normal file
@@ -0,0 +1,66 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifdef __GNUC__
|
||||
# pragma GCC diagnostic ignored "-Wmissing-declarations"
|
||||
# if defined __clang__ || defined __APPLE__
|
||||
# pragma GCC diagnostic ignored "-Wmissing-prototypes"
|
||||
# pragma GCC diagnostic ignored "-Wextra"
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef __OPENCV_PERF_PRECOMP_HPP__
|
||||
#define __OPENCV_PERF_PRECOMP_HPP__
|
||||
|
||||
#include "opencv2/ts.hpp"
|
||||
#include "opencv2/ts/gpu_perf.hpp"
|
||||
|
||||
#include "opencv2/gpuoptflow.hpp"
|
||||
|
||||
#include "opencv2/video.hpp"
|
||||
#include "opencv2/legacy.hpp"
|
||||
|
||||
#ifdef GTEST_CREATE_SHARED_LIBRARY
|
||||
#error no modules except ts should have GTEST_CREATE_SHARED_LIBRARY defined
|
||||
#endif
|
||||
|
||||
#endif
|
414
modules/gpuoptflow/src/cuda/optflowbm.cu
Normal file
414
modules/gpuoptflow/src/cuda/optflowbm.cu
Normal file
@@ -0,0 +1,414 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/limits.hpp"
|
||||
#include "opencv2/core/cuda/functional.hpp"
|
||||
#include "opencv2/core/cuda/reduce.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
using namespace cv::gpu::cudev;
|
||||
|
||||
namespace optflowbm
|
||||
{
|
||||
texture<uchar, cudaTextureType2D, cudaReadModeElementType> tex_prev(false, cudaFilterModePoint, cudaAddressModeClamp);
|
||||
texture<uchar, cudaTextureType2D, cudaReadModeElementType> tex_curr(false, cudaFilterModePoint, cudaAddressModeClamp);
|
||||
|
||||
__device__ int cmpBlocks(int X1, int Y1, int X2, int Y2, int2 blockSize)
|
||||
{
|
||||
int s = 0;
|
||||
|
||||
for (int y = 0; y < blockSize.y; ++y)
|
||||
{
|
||||
for (int x = 0; x < blockSize.x; ++x)
|
||||
s += ::abs(tex2D(tex_prev, X1 + x, Y1 + y) - tex2D(tex_curr, X2 + x, Y2 + y));
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
__global__ void calcOptFlowBM(PtrStepSzf velx, PtrStepf vely, const int2 blockSize, const int2 shiftSize, const bool usePrevious,
|
||||
const int maxX, const int maxY, const int acceptLevel, const int escapeLevel,
|
||||
const short2* ss, const int ssCount)
|
||||
{
|
||||
const int j = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int i = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (i >= velx.rows || j >= velx.cols)
|
||||
return;
|
||||
|
||||
const int X1 = j * shiftSize.x;
|
||||
const int Y1 = i * shiftSize.y;
|
||||
|
||||
const int offX = usePrevious ? __float2int_rn(velx(i, j)) : 0;
|
||||
const int offY = usePrevious ? __float2int_rn(vely(i, j)) : 0;
|
||||
|
||||
int X2 = X1 + offX;
|
||||
int Y2 = Y1 + offY;
|
||||
|
||||
int dist = numeric_limits<int>::max();
|
||||
|
||||
if (0 <= X2 && X2 <= maxX && 0 <= Y2 && Y2 <= maxY)
|
||||
dist = cmpBlocks(X1, Y1, X2, Y2, blockSize);
|
||||
|
||||
int countMin = 1;
|
||||
int sumx = offX;
|
||||
int sumy = offY;
|
||||
|
||||
if (dist > acceptLevel)
|
||||
{
|
||||
// do brute-force search
|
||||
for (int k = 0; k < ssCount; ++k)
|
||||
{
|
||||
const short2 ssVal = ss[k];
|
||||
|
||||
const int dx = offX + ssVal.x;
|
||||
const int dy = offY + ssVal.y;
|
||||
|
||||
X2 = X1 + dx;
|
||||
Y2 = Y1 + dy;
|
||||
|
||||
if (0 <= X2 && X2 <= maxX && 0 <= Y2 && Y2 <= maxY)
|
||||
{
|
||||
const int tmpDist = cmpBlocks(X1, Y1, X2, Y2, blockSize);
|
||||
if (tmpDist < acceptLevel)
|
||||
{
|
||||
sumx = dx;
|
||||
sumy = dy;
|
||||
countMin = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
if (tmpDist < dist)
|
||||
{
|
||||
dist = tmpDist;
|
||||
sumx = dx;
|
||||
sumy = dy;
|
||||
countMin = 1;
|
||||
}
|
||||
else if (tmpDist == dist)
|
||||
{
|
||||
sumx += dx;
|
||||
sumy += dy;
|
||||
countMin++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (dist > escapeLevel)
|
||||
{
|
||||
sumx = offX;
|
||||
sumy = offY;
|
||||
countMin = 1;
|
||||
}
|
||||
}
|
||||
|
||||
velx(i, j) = static_cast<float>(sumx) / countMin;
|
||||
vely(i, j) = static_cast<float>(sumy) / countMin;
|
||||
}
|
||||
|
||||
void calc(PtrStepSzb prev, PtrStepSzb curr, PtrStepSzf velx, PtrStepSzf vely, int2 blockSize, int2 shiftSize, bool usePrevious,
|
||||
int maxX, int maxY, int acceptLevel, int escapeLevel, const short2* ss, int ssCount, cudaStream_t stream)
|
||||
{
|
||||
bindTexture(&tex_prev, prev);
|
||||
bindTexture(&tex_curr, curr);
|
||||
|
||||
const dim3 block(32, 8);
|
||||
const dim3 grid(divUp(velx.cols, block.x), divUp(vely.rows, block.y));
|
||||
|
||||
calcOptFlowBM<<<grid, block, 0, stream>>>(velx, vely, blockSize, shiftSize, usePrevious,
|
||||
maxX, maxY, acceptLevel, escapeLevel, ss, ssCount);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////
|
||||
// Fast approximate version
|
||||
|
||||
namespace optflowbm_fast
|
||||
{
|
||||
enum
|
||||
{
|
||||
CTA_SIZE = 128,
|
||||
|
||||
TILE_COLS = 128,
|
||||
TILE_ROWS = 32,
|
||||
|
||||
STRIDE = CTA_SIZE
|
||||
};
|
||||
|
||||
template <typename T> __device__ __forceinline__ int calcDist(T a, T b)
|
||||
{
|
||||
return ::abs(a - b);
|
||||
}
|
||||
|
||||
template <class T> struct FastOptFlowBM
|
||||
{
|
||||
|
||||
int search_radius;
|
||||
int block_radius;
|
||||
|
||||
int search_window;
|
||||
int block_window;
|
||||
|
||||
PtrStepSz<T> I0;
|
||||
PtrStep<T> I1;
|
||||
|
||||
mutable PtrStepi buffer;
|
||||
|
||||
FastOptFlowBM(int search_window_, int block_window_,
|
||||
PtrStepSz<T> I0_, PtrStepSz<T> I1_,
|
||||
PtrStepi buffer_) :
|
||||
search_radius(search_window_ / 2), block_radius(block_window_ / 2),
|
||||
search_window(search_window_), block_window(block_window_),
|
||||
I0(I0_), I1(I1_),
|
||||
buffer(buffer_)
|
||||
{
|
||||
}
|
||||
|
||||
__device__ __forceinline__ void initSums_BruteForce(int i, int j, int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums) const
|
||||
{
|
||||
for (int index = threadIdx.x; index < search_window * search_window; index += STRIDE)
|
||||
{
|
||||
dist_sums[index] = 0;
|
||||
|
||||
for (int tx = 0; tx < block_window; ++tx)
|
||||
col_sums(tx, index) = 0;
|
||||
|
||||
int y = index / search_window;
|
||||
int x = index - y * search_window;
|
||||
|
||||
int ay = i;
|
||||
int ax = j;
|
||||
|
||||
int by = i + y - search_radius;
|
||||
int bx = j + x - search_radius;
|
||||
|
||||
for (int tx = -block_radius; tx <= block_radius; ++tx)
|
||||
{
|
||||
int col_sum = 0;
|
||||
for (int ty = -block_radius; ty <= block_radius; ++ty)
|
||||
{
|
||||
int dist = calcDist(I0(ay + ty, ax + tx), I1(by + ty, bx + tx));
|
||||
|
||||
dist_sums[index] += dist;
|
||||
col_sum += dist;
|
||||
}
|
||||
|
||||
col_sums(tx + block_radius, index) = col_sum;
|
||||
}
|
||||
|
||||
up_col_sums(j, index) = col_sums(block_window - 1, index);
|
||||
}
|
||||
}
|
||||
|
||||
__device__ __forceinline__ void shiftRight_FirstRow(int i, int j, int first, int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums) const
|
||||
{
|
||||
for (int index = threadIdx.x; index < search_window * search_window; index += STRIDE)
|
||||
{
|
||||
int y = index / search_window;
|
||||
int x = index - y * search_window;
|
||||
|
||||
int ay = i;
|
||||
int ax = j + block_radius;
|
||||
|
||||
int by = i + y - search_radius;
|
||||
int bx = j + x - search_radius + block_radius;
|
||||
|
||||
int col_sum = 0;
|
||||
|
||||
for (int ty = -block_radius; ty <= block_radius; ++ty)
|
||||
col_sum += calcDist(I0(ay + ty, ax), I1(by + ty, bx));
|
||||
|
||||
dist_sums[index] += col_sum - col_sums(first, index);
|
||||
|
||||
col_sums(first, index) = col_sum;
|
||||
up_col_sums(j, index) = col_sum;
|
||||
}
|
||||
}
|
||||
|
||||
__device__ __forceinline__ void shiftRight_UpSums(int i, int j, int first, int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums) const
|
||||
{
|
||||
int ay = i;
|
||||
int ax = j + block_radius;
|
||||
|
||||
T a_up = I0(ay - block_radius - 1, ax);
|
||||
T a_down = I0(ay + block_radius, ax);
|
||||
|
||||
for(int index = threadIdx.x; index < search_window * search_window; index += STRIDE)
|
||||
{
|
||||
int y = index / search_window;
|
||||
int x = index - y * search_window;
|
||||
|
||||
int by = i + y - search_radius;
|
||||
int bx = j + x - search_radius + block_radius;
|
||||
|
||||
T b_up = I1(by - block_radius - 1, bx);
|
||||
T b_down = I1(by + block_radius, bx);
|
||||
|
||||
int col_sum = up_col_sums(j, index) + calcDist(a_down, b_down) - calcDist(a_up, b_up);
|
||||
|
||||
dist_sums[index] += col_sum - col_sums(first, index);
|
||||
col_sums(first, index) = col_sum;
|
||||
up_col_sums(j, index) = col_sum;
|
||||
}
|
||||
}
|
||||
|
||||
__device__ __forceinline__ void convolve_window(int i, int j, const int* dist_sums, float& velx, float& vely) const
|
||||
{
|
||||
int bestDist = numeric_limits<int>::max();
|
||||
int bestInd = -1;
|
||||
|
||||
for (int index = threadIdx.x; index < search_window * search_window; index += STRIDE)
|
||||
{
|
||||
int curDist = dist_sums[index];
|
||||
if (curDist < bestDist)
|
||||
{
|
||||
bestDist = curDist;
|
||||
bestInd = index;
|
||||
}
|
||||
}
|
||||
|
||||
__shared__ int cta_dist_buffer[CTA_SIZE];
|
||||
__shared__ int cta_ind_buffer[CTA_SIZE];
|
||||
|
||||
reduceKeyVal<CTA_SIZE>(cta_dist_buffer, bestDist, cta_ind_buffer, bestInd, threadIdx.x, less<int>());
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
{
|
||||
int y = bestInd / search_window;
|
||||
int x = bestInd - y * search_window;
|
||||
|
||||
velx = x - search_radius;
|
||||
vely = y - search_radius;
|
||||
}
|
||||
}
|
||||
|
||||
__device__ __forceinline__ void operator()(PtrStepf velx, PtrStepf vely) const
|
||||
{
|
||||
int tbx = blockIdx.x * TILE_COLS;
|
||||
int tby = blockIdx.y * TILE_ROWS;
|
||||
|
||||
int tex = ::min(tbx + TILE_COLS, I0.cols);
|
||||
int tey = ::min(tby + TILE_ROWS, I0.rows);
|
||||
|
||||
PtrStepi col_sums;
|
||||
col_sums.data = buffer.ptr(I0.cols + blockIdx.x * block_window) + blockIdx.y * search_window * search_window;
|
||||
col_sums.step = buffer.step;
|
||||
|
||||
PtrStepi up_col_sums;
|
||||
up_col_sums.data = buffer.data + blockIdx.y * search_window * search_window;
|
||||
up_col_sums.step = buffer.step;
|
||||
|
||||
extern __shared__ int dist_sums[]; //search_window * search_window
|
||||
|
||||
int first = 0;
|
||||
|
||||
for (int i = tby; i < tey; ++i)
|
||||
{
|
||||
for (int j = tbx; j < tex; ++j)
|
||||
{
|
||||
__syncthreads();
|
||||
|
||||
if (j == tbx)
|
||||
{
|
||||
initSums_BruteForce(i, j, dist_sums, col_sums, up_col_sums);
|
||||
first = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (i == tby)
|
||||
shiftRight_FirstRow(i, j, first, dist_sums, col_sums, up_col_sums);
|
||||
else
|
||||
shiftRight_UpSums(i, j, first, dist_sums, col_sums, up_col_sums);
|
||||
|
||||
first = (first + 1) % block_window;
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
convolve_window(i, j, dist_sums, velx(i, j), vely(i, j));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
template<typename T> __global__ void optflowbm_fast_kernel(const FastOptFlowBM<T> fbm, PtrStepf velx, PtrStepf vely)
|
||||
{
|
||||
fbm(velx, vely);
|
||||
}
|
||||
|
||||
void get_buffer_size(int src_cols, int src_rows, int search_window, int block_window, int& buffer_cols, int& buffer_rows)
|
||||
{
|
||||
dim3 grid(divUp(src_cols, TILE_COLS), divUp(src_rows, TILE_ROWS));
|
||||
|
||||
buffer_cols = search_window * search_window * grid.y;
|
||||
buffer_rows = src_cols + block_window * grid.x;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void calc(PtrStepSzb I0, PtrStepSzb I1, PtrStepSzf velx, PtrStepSzf vely, PtrStepi buffer, int search_window, int block_window, cudaStream_t stream)
|
||||
{
|
||||
FastOptFlowBM<T> fbm(search_window, block_window, I0, I1, buffer);
|
||||
|
||||
dim3 block(CTA_SIZE, 1);
|
||||
dim3 grid(divUp(I0.cols, TILE_COLS), divUp(I0.rows, TILE_ROWS));
|
||||
|
||||
size_t smem = search_window * search_window * sizeof(int);
|
||||
|
||||
optflowbm_fast_kernel<<<grid, block, smem, stream>>>(fbm, velx, vely);
|
||||
cudaSafeCall ( cudaGetLastError () );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template void calc<uchar>(PtrStepSzb I0, PtrStepSzb I1, PtrStepSzf velx, PtrStepSzf vely, PtrStepi buffer, int search_window, int block_window, cudaStream_t stream);
|
||||
}
|
||||
|
||||
#endif // !defined CUDA_DISABLER
|
220
modules/gpuoptflow/src/cuda/optical_flow.cu
Normal file
220
modules/gpuoptflow/src/cuda/optical_flow.cu
Normal file
@@ -0,0 +1,220 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
namespace optical_flow
|
||||
{
|
||||
#define NEEDLE_MAP_SCALE 16
|
||||
#define NUM_VERTS_PER_ARROW 6
|
||||
|
||||
__global__ void NeedleMapAverageKernel(const PtrStepSzf u, const PtrStepf v, PtrStepf u_avg, PtrStepf v_avg)
|
||||
{
|
||||
__shared__ float smem[2 * NEEDLE_MAP_SCALE];
|
||||
|
||||
volatile float* u_col_sum = smem;
|
||||
volatile float* v_col_sum = u_col_sum + NEEDLE_MAP_SCALE;
|
||||
|
||||
const int x = blockIdx.x * NEEDLE_MAP_SCALE + threadIdx.x;
|
||||
const int y = blockIdx.y * NEEDLE_MAP_SCALE;
|
||||
|
||||
u_col_sum[threadIdx.x] = 0;
|
||||
v_col_sum[threadIdx.x] = 0;
|
||||
|
||||
#pragma unroll
|
||||
for(int i = 0; i < NEEDLE_MAP_SCALE; ++i)
|
||||
{
|
||||
u_col_sum[threadIdx.x] += u(::min(y + i, u.rows - 1), x);
|
||||
v_col_sum[threadIdx.x] += v(::min(y + i, u.rows - 1), x);
|
||||
}
|
||||
|
||||
if (threadIdx.x < 8)
|
||||
{
|
||||
// now add the column sums
|
||||
const uint X = threadIdx.x;
|
||||
|
||||
if (X | 0xfe == 0xfe) // bit 0 is 0
|
||||
{
|
||||
u_col_sum[threadIdx.x] += u_col_sum[threadIdx.x + 1];
|
||||
v_col_sum[threadIdx.x] += v_col_sum[threadIdx.x + 1];
|
||||
}
|
||||
|
||||
if (X | 0xfe == 0xfc) // bits 0 & 1 == 0
|
||||
{
|
||||
u_col_sum[threadIdx.x] += u_col_sum[threadIdx.x + 2];
|
||||
v_col_sum[threadIdx.x] += v_col_sum[threadIdx.x + 2];
|
||||
}
|
||||
|
||||
if (X | 0xf8 == 0xf8)
|
||||
{
|
||||
u_col_sum[threadIdx.x] += u_col_sum[threadIdx.x + 4];
|
||||
v_col_sum[threadIdx.x] += v_col_sum[threadIdx.x + 4];
|
||||
}
|
||||
|
||||
if (X == 0)
|
||||
{
|
||||
u_col_sum[threadIdx.x] += u_col_sum[threadIdx.x + 8];
|
||||
v_col_sum[threadIdx.x] += v_col_sum[threadIdx.x + 8];
|
||||
}
|
||||
}
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
{
|
||||
const float coeff = 1.0f / (NEEDLE_MAP_SCALE * NEEDLE_MAP_SCALE);
|
||||
|
||||
u_col_sum[0] *= coeff;
|
||||
v_col_sum[0] *= coeff;
|
||||
|
||||
u_avg(blockIdx.y, blockIdx.x) = u_col_sum[0];
|
||||
v_avg(blockIdx.y, blockIdx.x) = v_col_sum[0];
|
||||
}
|
||||
}
|
||||
|
||||
void NeedleMapAverage_gpu(PtrStepSzf u, PtrStepSzf v, PtrStepSzf u_avg, PtrStepSzf v_avg)
|
||||
{
|
||||
const dim3 block(NEEDLE_MAP_SCALE);
|
||||
const dim3 grid(u_avg.cols, u_avg.rows);
|
||||
|
||||
NeedleMapAverageKernel<<<grid, block>>>(u, v, u_avg, v_avg);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
__global__ void NeedleMapVertexKernel(const PtrStepSzf u_avg, const PtrStepf v_avg, float* vertex_data, float* color_data, float max_flow, float xscale, float yscale)
|
||||
{
|
||||
// test - just draw a triangle at each pixel
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
const float arrow_x = x * NEEDLE_MAP_SCALE + NEEDLE_MAP_SCALE / 2.0f;
|
||||
const float arrow_y = y * NEEDLE_MAP_SCALE + NEEDLE_MAP_SCALE / 2.0f;
|
||||
|
||||
float3 v[NUM_VERTS_PER_ARROW];
|
||||
|
||||
if (x < u_avg.cols && y < u_avg.rows)
|
||||
{
|
||||
const float u_avg_val = u_avg(y, x);
|
||||
const float v_avg_val = v_avg(y, x);
|
||||
|
||||
const float theta = ::atan2f(v_avg_val, u_avg_val);// + CV_PI;
|
||||
|
||||
float r = ::sqrtf(v_avg_val * v_avg_val + u_avg_val * u_avg_val);
|
||||
r = fmin(14.0f * (r / max_flow), 14.0f);
|
||||
|
||||
v[0].z = 1.0f;
|
||||
v[1].z = 0.7f;
|
||||
v[2].z = 0.7f;
|
||||
v[3].z = 0.7f;
|
||||
v[4].z = 0.7f;
|
||||
v[5].z = 1.0f;
|
||||
|
||||
v[0].x = arrow_x;
|
||||
v[0].y = arrow_y;
|
||||
v[5].x = arrow_x;
|
||||
v[5].y = arrow_y;
|
||||
|
||||
v[2].x = arrow_x + r * ::cosf(theta);
|
||||
v[2].y = arrow_y + r * ::sinf(theta);
|
||||
v[3].x = v[2].x;
|
||||
v[3].y = v[2].y;
|
||||
|
||||
r = ::fmin(r, 2.5f);
|
||||
|
||||
v[1].x = arrow_x + r * ::cosf(theta - CV_PI_F / 2.0f);
|
||||
v[1].y = arrow_y + r * ::sinf(theta - CV_PI_F / 2.0f);
|
||||
|
||||
v[4].x = arrow_x + r * ::cosf(theta + CV_PI_F / 2.0f);
|
||||
v[4].y = arrow_y + r * ::sinf(theta + CV_PI_F / 2.0f);
|
||||
|
||||
int indx = (y * u_avg.cols + x) * NUM_VERTS_PER_ARROW * 3;
|
||||
|
||||
color_data[indx] = (theta - CV_PI_F) / CV_PI_F * 180.0f;
|
||||
vertex_data[indx++] = v[0].x * xscale;
|
||||
vertex_data[indx++] = v[0].y * yscale;
|
||||
vertex_data[indx++] = v[0].z;
|
||||
|
||||
color_data[indx] = (theta - CV_PI_F) / CV_PI_F * 180.0f;
|
||||
vertex_data[indx++] = v[1].x * xscale;
|
||||
vertex_data[indx++] = v[1].y * yscale;
|
||||
vertex_data[indx++] = v[1].z;
|
||||
|
||||
color_data[indx] = (theta - CV_PI_F) / CV_PI_F * 180.0f;
|
||||
vertex_data[indx++] = v[2].x * xscale;
|
||||
vertex_data[indx++] = v[2].y * yscale;
|
||||
vertex_data[indx++] = v[2].z;
|
||||
|
||||
color_data[indx] = (theta - CV_PI_F) / CV_PI_F * 180.0f;
|
||||
vertex_data[indx++] = v[3].x * xscale;
|
||||
vertex_data[indx++] = v[3].y * yscale;
|
||||
vertex_data[indx++] = v[3].z;
|
||||
|
||||
color_data[indx] = (theta - CV_PI_F) / CV_PI_F * 180.0f;
|
||||
vertex_data[indx++] = v[4].x * xscale;
|
||||
vertex_data[indx++] = v[4].y * yscale;
|
||||
vertex_data[indx++] = v[4].z;
|
||||
|
||||
color_data[indx] = (theta - CV_PI_F) / CV_PI_F * 180.0f;
|
||||
vertex_data[indx++] = v[5].x * xscale;
|
||||
vertex_data[indx++] = v[5].y * yscale;
|
||||
vertex_data[indx++] = v[5].z;
|
||||
}
|
||||
}
|
||||
|
||||
void CreateOpticalFlowNeedleMap_gpu(PtrStepSzf u_avg, PtrStepSzf v_avg, float* vertex_buffer, float* color_data, float max_flow, float xscale, float yscale)
|
||||
{
|
||||
const dim3 block(16);
|
||||
const dim3 grid(divUp(u_avg.cols, block.x), divUp(u_avg.rows, block.y));
|
||||
|
||||
NeedleMapVertexKernel<<<grid, block>>>(u_avg, v_avg, vertex_buffer, color_data, max_flow, xscale, yscale);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
}}}
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
647
modules/gpuoptflow/src/cuda/optical_flow_farneback.cu
Normal file
647
modules/gpuoptflow/src/cuda/optical_flow_farneback.cu
Normal file
@@ -0,0 +1,647 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/border_interpolate.hpp"
|
||||
|
||||
#define tx threadIdx.x
|
||||
#define ty threadIdx.y
|
||||
#define bx blockIdx.x
|
||||
#define by blockIdx.y
|
||||
#define bdx blockDim.x
|
||||
#define bdy blockDim.y
|
||||
|
||||
#define BORDER_SIZE 5
|
||||
#define MAX_KSIZE_HALF 100
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev { namespace optflow_farneback
|
||||
{
|
||||
__constant__ float c_g[8];
|
||||
__constant__ float c_xg[8];
|
||||
__constant__ float c_xxg[8];
|
||||
__constant__ float c_ig11, c_ig03, c_ig33, c_ig55;
|
||||
|
||||
|
||||
template <int polyN>
|
||||
__global__ void polynomialExpansion(
|
||||
const int height, const int width, const PtrStepf src, PtrStepf dst)
|
||||
{
|
||||
const int y = by * bdy + ty;
|
||||
const int x = bx * (bdx - 2*polyN) + tx - polyN;
|
||||
|
||||
if (y < height)
|
||||
{
|
||||
extern __shared__ float smem[];
|
||||
volatile float *row = smem + tx;
|
||||
int xWarped = ::min(::max(x, 0), width - 1);
|
||||
|
||||
row[0] = src(y, xWarped) * c_g[0];
|
||||
row[bdx] = 0.f;
|
||||
row[2*bdx] = 0.f;
|
||||
|
||||
for (int k = 1; k <= polyN; ++k)
|
||||
{
|
||||
float t0 = src(::max(y - k, 0), xWarped);
|
||||
float t1 = src(::min(y + k, height - 1), xWarped);
|
||||
|
||||
row[0] += c_g[k] * (t0 + t1);
|
||||
row[bdx] += c_xg[k] * (t1 - t0);
|
||||
row[2*bdx] += c_xxg[k] * (t0 + t1);
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (tx >= polyN && tx + polyN < bdx && x < width)
|
||||
{
|
||||
float b1 = c_g[0] * row[0];
|
||||
float b3 = c_g[0] * row[bdx];
|
||||
float b5 = c_g[0] * row[2*bdx];
|
||||
float b2 = 0, b4 = 0, b6 = 0;
|
||||
|
||||
for (int k = 1; k <= polyN; ++k)
|
||||
{
|
||||
b1 += (row[k] + row[-k]) * c_g[k];
|
||||
b4 += (row[k] + row[-k]) * c_xxg[k];
|
||||
b2 += (row[k] - row[-k]) * c_xg[k];
|
||||
b3 += (row[k + bdx] + row[-k + bdx]) * c_g[k];
|
||||
b6 += (row[k + bdx] - row[-k + bdx]) * c_xg[k];
|
||||
b5 += (row[k + 2*bdx] + row[-k + 2*bdx]) * c_g[k];
|
||||
}
|
||||
|
||||
dst(y, xWarped) = b3*c_ig11;
|
||||
dst(height + y, xWarped) = b2*c_ig11;
|
||||
dst(2*height + y, xWarped) = b1*c_ig03 + b5*c_ig33;
|
||||
dst(3*height + y, xWarped) = b1*c_ig03 + b4*c_ig33;
|
||||
dst(4*height + y, xWarped) = b6*c_ig55;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void setPolynomialExpansionConsts(
|
||||
int polyN, const float *g, const float *xg, const float *xxg,
|
||||
float ig11, float ig03, float ig33, float ig55)
|
||||
{
|
||||
cudaSafeCall(cudaMemcpyToSymbol(c_g, g, (polyN + 1) * sizeof(*g)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(c_xg, xg, (polyN + 1) * sizeof(*xg)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(c_xxg, xxg, (polyN + 1) * sizeof(*xxg)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(c_ig11, &ig11, sizeof(ig11)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(c_ig03, &ig03, sizeof(ig03)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(c_ig33, &ig33, sizeof(ig33)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(c_ig55, &ig55, sizeof(ig55)));
|
||||
}
|
||||
|
||||
|
||||
void polynomialExpansionGpu(const PtrStepSzf &src, int polyN, PtrStepSzf dst, cudaStream_t stream)
|
||||
{
|
||||
dim3 block(256);
|
||||
dim3 grid(divUp(src.cols, block.x - 2*polyN), src.rows);
|
||||
int smem = 3 * block.x * sizeof(float);
|
||||
|
||||
if (polyN == 5)
|
||||
polynomialExpansion<5><<<grid, block, smem, stream>>>(src.rows, src.cols, src, dst);
|
||||
else if (polyN == 7)
|
||||
polynomialExpansion<7><<<grid, block, smem, stream>>>(src.rows, src.cols, src, dst);
|
||||
|
||||
cudaSafeCall(cudaGetLastError());
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
|
||||
__constant__ float c_border[BORDER_SIZE + 1];
|
||||
|
||||
__global__ void updateMatrices(
|
||||
const int height, const int width, const PtrStepf flowx, const PtrStepf flowy,
|
||||
const PtrStepf R0, const PtrStepf R1, PtrStepf M)
|
||||
{
|
||||
const int y = by * bdy + ty;
|
||||
const int x = bx * bdx + tx;
|
||||
|
||||
if (y < height && x < width)
|
||||
{
|
||||
float dx = flowx(y, x);
|
||||
float dy = flowy(y, x);
|
||||
float fx = x + dx;
|
||||
float fy = y + dy;
|
||||
|
||||
int x1 = floorf(fx);
|
||||
int y1 = floorf(fy);
|
||||
fx -= x1; fy -= y1;
|
||||
|
||||
float r2, r3, r4, r5, r6;
|
||||
|
||||
if (x1 >= 0 && y1 >= 0 && x1 < width - 1 && y1 < height - 1)
|
||||
{
|
||||
float a00 = (1.f - fx) * (1.f - fy);
|
||||
float a01 = fx * (1.f - fy);
|
||||
float a10 = (1.f - fx) * fy;
|
||||
float a11 = fx * fy;
|
||||
|
||||
r2 = a00 * R1(y1, x1) +
|
||||
a01 * R1(y1, x1 + 1) +
|
||||
a10 * R1(y1 + 1, x1) +
|
||||
a11 * R1(y1 + 1, x1 + 1);
|
||||
|
||||
r3 = a00 * R1(height + y1, x1) +
|
||||
a01 * R1(height + y1, x1 + 1) +
|
||||
a10 * R1(height + y1 + 1, x1) +
|
||||
a11 * R1(height + y1 + 1, x1 + 1);
|
||||
|
||||
r4 = a00 * R1(2*height + y1, x1) +
|
||||
a01 * R1(2*height + y1, x1 + 1) +
|
||||
a10 * R1(2*height + y1 + 1, x1) +
|
||||
a11 * R1(2*height + y1 + 1, x1 + 1);
|
||||
|
||||
r5 = a00 * R1(3*height + y1, x1) +
|
||||
a01 * R1(3*height + y1, x1 + 1) +
|
||||
a10 * R1(3*height + y1 + 1, x1) +
|
||||
a11 * R1(3*height + y1 + 1, x1 + 1);
|
||||
|
||||
r6 = a00 * R1(4*height + y1, x1) +
|
||||
a01 * R1(4*height + y1, x1 + 1) +
|
||||
a10 * R1(4*height + y1 + 1, x1) +
|
||||
a11 * R1(4*height + y1 + 1, x1 + 1);
|
||||
|
||||
r4 = (R0(2*height + y, x) + r4) * 0.5f;
|
||||
r5 = (R0(3*height + y, x) + r5) * 0.5f;
|
||||
r6 = (R0(4*height + y, x) + r6) * 0.25f;
|
||||
}
|
||||
else
|
||||
{
|
||||
r2 = r3 = 0.f;
|
||||
r4 = R0(2*height + y, x);
|
||||
r5 = R0(3*height + y, x);
|
||||
r6 = R0(4*height + y, x) * 0.5f;
|
||||
}
|
||||
|
||||
r2 = (R0(y, x) - r2) * 0.5f;
|
||||
r3 = (R0(height + y, x) - r3) * 0.5f;
|
||||
|
||||
r2 += r4*dy + r6*dx;
|
||||
r3 += r6*dy + r5*dx;
|
||||
|
||||
float scale =
|
||||
c_border[::min(x, BORDER_SIZE)] *
|
||||
c_border[::min(y, BORDER_SIZE)] *
|
||||
c_border[::min(width - x - 1, BORDER_SIZE)] *
|
||||
c_border[::min(height - y - 1, BORDER_SIZE)];
|
||||
|
||||
r2 *= scale; r3 *= scale; r4 *= scale;
|
||||
r5 *= scale; r6 *= scale;
|
||||
|
||||
M(y, x) = r4*r4 + r6*r6;
|
||||
M(height + y, x) = (r4 + r5)*r6;
|
||||
M(2*height + y, x) = r5*r5 + r6*r6;
|
||||
M(3*height + y, x) = r4*r2 + r6*r3;
|
||||
M(4*height + y, x) = r6*r2 + r5*r3;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void setUpdateMatricesConsts()
|
||||
{
|
||||
static const float border[BORDER_SIZE + 1] = {0.14f, 0.14f, 0.4472f, 0.4472f, 0.4472f, 1.f};
|
||||
cudaSafeCall(cudaMemcpyToSymbol(c_border, border, (BORDER_SIZE + 1) * sizeof(*border)));
|
||||
}
|
||||
|
||||
|
||||
void updateMatricesGpu(
|
||||
const PtrStepSzf flowx, const PtrStepSzf flowy, const PtrStepSzf R0, const PtrStepSzf R1,
|
||||
PtrStepSzf M, cudaStream_t stream)
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(flowx.cols, block.x), divUp(flowx.rows, block.y));
|
||||
|
||||
updateMatrices<<<grid, block, 0, stream>>>(flowx.rows, flowx.cols, flowx, flowy, R0, R1, M);
|
||||
|
||||
cudaSafeCall(cudaGetLastError());
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
|
||||
__global__ void updateFlow(
|
||||
const int height, const int width, const PtrStepf M, PtrStepf flowx, PtrStepf flowy)
|
||||
{
|
||||
const int y = by * bdy + ty;
|
||||
const int x = bx * bdx + tx;
|
||||
|
||||
if (y < height && x < width)
|
||||
{
|
||||
float g11 = M(y, x);
|
||||
float g12 = M(height + y, x);
|
||||
float g22 = M(2*height + y, x);
|
||||
float h1 = M(3*height + y, x);
|
||||
float h2 = M(4*height + y, x);
|
||||
|
||||
float detInv = 1.f / (g11*g22 - g12*g12 + 1e-3f);
|
||||
|
||||
flowx(y, x) = (g11*h2 - g12*h1) * detInv;
|
||||
flowy(y, x) = (g22*h1 - g12*h2) * detInv;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void updateFlowGpu(const PtrStepSzf M, PtrStepSzf flowx, PtrStepSzf flowy, cudaStream_t stream)
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(flowx.cols, block.x), divUp(flowx.rows, block.y));
|
||||
|
||||
updateFlow<<<grid, block, 0, stream>>>(flowx.rows, flowx.cols, M, flowx, flowy);
|
||||
|
||||
cudaSafeCall(cudaGetLastError());
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
|
||||
/*__global__ void boxFilter(
|
||||
const int height, const int width, const PtrStepf src,
|
||||
const int ksizeHalf, const float boxAreaInv, PtrStepf dst)
|
||||
{
|
||||
const int y = by * bdy + ty;
|
||||
const int x = bx * bdx + tx;
|
||||
|
||||
extern __shared__ float smem[];
|
||||
volatile float *row = smem + ty * (bdx + 2*ksizeHalf);
|
||||
|
||||
if (y < height)
|
||||
{
|
||||
// Vertical pass
|
||||
for (int i = tx; i < bdx + 2*ksizeHalf; i += bdx)
|
||||
{
|
||||
int xExt = int(bx * bdx) + i - ksizeHalf;
|
||||
xExt = ::min(::max(xExt, 0), width - 1);
|
||||
|
||||
row[i] = src(y, xExt);
|
||||
for (int j = 1; j <= ksizeHalf; ++j)
|
||||
row[i] += src(::max(y - j, 0), xExt) + src(::min(y + j, height - 1), xExt);
|
||||
}
|
||||
|
||||
if (x < width)
|
||||
{
|
||||
__syncthreads();
|
||||
|
||||
// Horizontal passs
|
||||
row += tx + ksizeHalf;
|
||||
float res = row[0];
|
||||
for (int i = 1; i <= ksizeHalf; ++i)
|
||||
res += row[-i] + row[i];
|
||||
dst(y, x) = res * boxAreaInv;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void boxFilterGpu(const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, cudaStream_t stream)
|
||||
{
|
||||
dim3 block(256);
|
||||
dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
|
||||
int smem = (block.x + 2*ksizeHalf) * block.y * sizeof(float);
|
||||
|
||||
float boxAreaInv = 1.f / ((1 + 2*ksizeHalf) * (1 + 2*ksizeHalf));
|
||||
boxFilter<<<grid, block, smem, stream>>>(src.rows, src.cols, src, ksizeHalf, boxAreaInv, dst);
|
||||
|
||||
cudaSafeCall(cudaGetLastError());
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}*/
|
||||
|
||||
|
||||
__global__ void boxFilter5(
|
||||
const int height, const int width, const PtrStepf src,
|
||||
const int ksizeHalf, const float boxAreaInv, PtrStepf dst)
|
||||
{
|
||||
const int y = by * bdy + ty;
|
||||
const int x = bx * bdx + tx;
|
||||
|
||||
extern __shared__ float smem[];
|
||||
|
||||
const int smw = bdx + 2*ksizeHalf; // shared memory "width"
|
||||
volatile float *row = smem + 5 * ty * smw;
|
||||
|
||||
if (y < height)
|
||||
{
|
||||
// Vertical pass
|
||||
for (int i = tx; i < bdx + 2*ksizeHalf; i += bdx)
|
||||
{
|
||||
int xExt = int(bx * bdx) + i - ksizeHalf;
|
||||
xExt = ::min(::max(xExt, 0), width - 1);
|
||||
|
||||
#pragma unroll
|
||||
for (int k = 0; k < 5; ++k)
|
||||
row[k*smw + i] = src(k*height + y, xExt);
|
||||
|
||||
for (int j = 1; j <= ksizeHalf; ++j)
|
||||
#pragma unroll
|
||||
for (int k = 0; k < 5; ++k)
|
||||
row[k*smw + i] +=
|
||||
src(k*height + ::max(y - j, 0), xExt) +
|
||||
src(k*height + ::min(y + j, height - 1), xExt);
|
||||
}
|
||||
|
||||
if (x < width)
|
||||
{
|
||||
__syncthreads();
|
||||
|
||||
// Horizontal passs
|
||||
|
||||
row += tx + ksizeHalf;
|
||||
float res[5];
|
||||
|
||||
#pragma unroll
|
||||
for (int k = 0; k < 5; ++k)
|
||||
res[k] = row[k*smw];
|
||||
|
||||
for (int i = 1; i <= ksizeHalf; ++i)
|
||||
#pragma unroll
|
||||
for (int k = 0; k < 5; ++k)
|
||||
res[k] += row[k*smw - i] + row[k*smw + i];
|
||||
|
||||
#pragma unroll
|
||||
for (int k = 0; k < 5; ++k)
|
||||
dst(k*height + y, x) = res[k] * boxAreaInv;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void boxFilter5Gpu(const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, cudaStream_t stream)
|
||||
{
|
||||
int height = src.rows / 5;
|
||||
int width = src.cols;
|
||||
|
||||
dim3 block(256);
|
||||
dim3 grid(divUp(width, block.x), divUp(height, block.y));
|
||||
int smem = (block.x + 2*ksizeHalf) * 5 * block.y * sizeof(float);
|
||||
|
||||
float boxAreaInv = 1.f / ((1 + 2*ksizeHalf) * (1 + 2*ksizeHalf));
|
||||
boxFilter5<<<grid, block, smem, stream>>>(height, width, src, ksizeHalf, boxAreaInv, dst);
|
||||
|
||||
cudaSafeCall(cudaGetLastError());
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
|
||||
void boxFilter5Gpu_CC11(const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, cudaStream_t stream)
|
||||
{
|
||||
int height = src.rows / 5;
|
||||
int width = src.cols;
|
||||
|
||||
dim3 block(128);
|
||||
dim3 grid(divUp(width, block.x), divUp(height, block.y));
|
||||
int smem = (block.x + 2*ksizeHalf) * 5 * block.y * sizeof(float);
|
||||
|
||||
float boxAreaInv = 1.f / ((1 + 2*ksizeHalf) * (1 + 2*ksizeHalf));
|
||||
boxFilter5<<<grid, block, smem, stream>>>(height, width, src, ksizeHalf, boxAreaInv, dst);
|
||||
|
||||
cudaSafeCall(cudaGetLastError());
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
|
||||
__constant__ float c_gKer[MAX_KSIZE_HALF + 1];
|
||||
|
||||
template <typename Border>
|
||||
__global__ void gaussianBlur(
|
||||
const int height, const int width, const PtrStepf src, const int ksizeHalf,
|
||||
const Border b, PtrStepf dst)
|
||||
{
|
||||
const int y = by * bdy + ty;
|
||||
const int x = bx * bdx + tx;
|
||||
|
||||
extern __shared__ float smem[];
|
||||
volatile float *row = smem + ty * (bdx + 2*ksizeHalf);
|
||||
|
||||
if (y < height)
|
||||
{
|
||||
// Vertical pass
|
||||
for (int i = tx; i < bdx + 2*ksizeHalf; i += bdx)
|
||||
{
|
||||
int xExt = int(bx * bdx) + i - ksizeHalf;
|
||||
xExt = b.idx_col(xExt);
|
||||
row[i] = src(y, xExt) * c_gKer[0];
|
||||
for (int j = 1; j <= ksizeHalf; ++j)
|
||||
row[i] +=
|
||||
(src(b.idx_row_low(y - j), xExt) +
|
||||
src(b.idx_row_high(y + j), xExt)) * c_gKer[j];
|
||||
}
|
||||
|
||||
if (x < width)
|
||||
{
|
||||
__syncthreads();
|
||||
|
||||
// Horizontal pass
|
||||
row += tx + ksizeHalf;
|
||||
float res = row[0] * c_gKer[0];
|
||||
for (int i = 1; i <= ksizeHalf; ++i)
|
||||
res += (row[-i] + row[i]) * c_gKer[i];
|
||||
dst(y, x) = res;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void setGaussianBlurKernel(const float *gKer, int ksizeHalf)
|
||||
{
|
||||
cudaSafeCall(cudaMemcpyToSymbol(c_gKer, gKer, (ksizeHalf + 1) * sizeof(*gKer)));
|
||||
}
|
||||
|
||||
|
||||
template <typename Border>
|
||||
void gaussianBlurCaller(const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, cudaStream_t stream)
|
||||
{
|
||||
int height = src.rows;
|
||||
int width = src.cols;
|
||||
|
||||
dim3 block(256);
|
||||
dim3 grid(divUp(width, block.x), divUp(height, block.y));
|
||||
int smem = (block.x + 2*ksizeHalf) * block.y * sizeof(float);
|
||||
Border b(height, width);
|
||||
|
||||
gaussianBlur<<<grid, block, smem, stream>>>(height, width, src, ksizeHalf, b, dst);
|
||||
|
||||
cudaSafeCall(cudaGetLastError());
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
|
||||
void gaussianBlurGpu(
|
||||
const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, int borderMode, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*caller_t)(const PtrStepSzf, int, PtrStepSzf, cudaStream_t);
|
||||
|
||||
static const caller_t callers[] =
|
||||
{
|
||||
gaussianBlurCaller<BrdReflect101<float> >,
|
||||
gaussianBlurCaller<BrdReplicate<float> >,
|
||||
};
|
||||
|
||||
callers[borderMode](src, ksizeHalf, dst, stream);
|
||||
}
|
||||
|
||||
|
||||
template <typename Border>
|
||||
__global__ void gaussianBlur5(
|
||||
const int height, const int width, const PtrStepf src, const int ksizeHalf,
|
||||
const Border b, PtrStepf dst)
|
||||
{
|
||||
const int y = by * bdy + ty;
|
||||
const int x = bx * bdx + tx;
|
||||
|
||||
extern __shared__ float smem[];
|
||||
|
||||
const int smw = bdx + 2*ksizeHalf; // shared memory "width"
|
||||
volatile float *row = smem + 5 * ty * smw;
|
||||
|
||||
if (y < height)
|
||||
{
|
||||
// Vertical pass
|
||||
for (int i = tx; i < bdx + 2*ksizeHalf; i += bdx)
|
||||
{
|
||||
int xExt = int(bx * bdx) + i - ksizeHalf;
|
||||
xExt = b.idx_col(xExt);
|
||||
|
||||
#pragma unroll
|
||||
for (int k = 0; k < 5; ++k)
|
||||
row[k*smw + i] = src(k*height + y, xExt) * c_gKer[0];
|
||||
|
||||
for (int j = 1; j <= ksizeHalf; ++j)
|
||||
#pragma unroll
|
||||
for (int k = 0; k < 5; ++k)
|
||||
row[k*smw + i] +=
|
||||
(src(k*height + b.idx_row_low(y - j), xExt) +
|
||||
src(k*height + b.idx_row_high(y + j), xExt)) * c_gKer[j];
|
||||
}
|
||||
|
||||
if (x < width)
|
||||
{
|
||||
__syncthreads();
|
||||
|
||||
// Horizontal pass
|
||||
|
||||
row += tx + ksizeHalf;
|
||||
float res[5];
|
||||
|
||||
#pragma unroll
|
||||
for (int k = 0; k < 5; ++k)
|
||||
res[k] = row[k*smw] * c_gKer[0];
|
||||
|
||||
for (int i = 1; i <= ksizeHalf; ++i)
|
||||
#pragma unroll
|
||||
for (int k = 0; k < 5; ++k)
|
||||
res[k] += (row[k*smw - i] + row[k*smw + i]) * c_gKer[i];
|
||||
|
||||
#pragma unroll
|
||||
for (int k = 0; k < 5; ++k)
|
||||
dst(k*height + y, x) = res[k];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename Border, int blockDimX>
|
||||
void gaussianBlur5Caller(
|
||||
const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, cudaStream_t stream)
|
||||
{
|
||||
int height = src.rows / 5;
|
||||
int width = src.cols;
|
||||
|
||||
dim3 block(blockDimX);
|
||||
dim3 grid(divUp(width, block.x), divUp(height, block.y));
|
||||
int smem = (block.x + 2*ksizeHalf) * 5 * block.y * sizeof(float);
|
||||
Border b(height, width);
|
||||
|
||||
gaussianBlur5<<<grid, block, smem, stream>>>(height, width, src, ksizeHalf, b, dst);
|
||||
|
||||
cudaSafeCall(cudaGetLastError());
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
|
||||
void gaussianBlur5Gpu(
|
||||
const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, int borderMode, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*caller_t)(const PtrStepSzf, int, PtrStepSzf, cudaStream_t);
|
||||
|
||||
static const caller_t callers[] =
|
||||
{
|
||||
gaussianBlur5Caller<BrdReflect101<float>,256>,
|
||||
gaussianBlur5Caller<BrdReplicate<float>,256>,
|
||||
};
|
||||
|
||||
callers[borderMode](src, ksizeHalf, dst, stream);
|
||||
}
|
||||
|
||||
void gaussianBlur5Gpu_CC11(
|
||||
const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, int borderMode, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*caller_t)(const PtrStepSzf, int, PtrStepSzf, cudaStream_t);
|
||||
|
||||
static const caller_t callers[] =
|
||||
{
|
||||
gaussianBlur5Caller<BrdReflect101<float>,128>,
|
||||
gaussianBlur5Caller<BrdReplicate<float>,128>,
|
||||
};
|
||||
|
||||
callers[borderMode](src, ksizeHalf, dst, stream);
|
||||
}
|
||||
|
||||
}}}} // namespace cv { namespace gpu { namespace cudev { namespace optflow_farneback
|
||||
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
560
modules/gpuoptflow/src/cuda/pyrlk.cu
Normal file
560
modules/gpuoptflow/src/cuda/pyrlk.cu
Normal file
@@ -0,0 +1,560 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/utility.hpp"
|
||||
#include "opencv2/core/cuda/functional.hpp"
|
||||
#include "opencv2/core/cuda/limits.hpp"
|
||||
#include "opencv2/core/cuda/vec_math.hpp"
|
||||
#include "opencv2/core/cuda/reduce.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
using namespace cv::gpu::cudev;
|
||||
|
||||
namespace pyrlk
|
||||
{
|
||||
__constant__ int c_winSize_x;
|
||||
__constant__ int c_winSize_y;
|
||||
__constant__ int c_halfWin_x;
|
||||
__constant__ int c_halfWin_y;
|
||||
__constant__ int c_iters;
|
||||
|
||||
texture<float, cudaTextureType2D, cudaReadModeElementType> tex_If(false, cudaFilterModeLinear, cudaAddressModeClamp);
|
||||
texture<float4, cudaTextureType2D, cudaReadModeElementType> tex_If4(false, cudaFilterModeLinear, cudaAddressModeClamp);
|
||||
texture<uchar, cudaTextureType2D, cudaReadModeElementType> tex_Ib(false, cudaFilterModePoint, cudaAddressModeClamp);
|
||||
|
||||
texture<float, cudaTextureType2D, cudaReadModeElementType> tex_Jf(false, cudaFilterModeLinear, cudaAddressModeClamp);
|
||||
texture<float4, cudaTextureType2D, cudaReadModeElementType> tex_Jf4(false, cudaFilterModeLinear, cudaAddressModeClamp);
|
||||
|
||||
template <int cn> struct Tex_I;
|
||||
template <> struct Tex_I<1>
|
||||
{
|
||||
static __device__ __forceinline__ float read(float x, float y)
|
||||
{
|
||||
return tex2D(tex_If, x, y);
|
||||
}
|
||||
};
|
||||
template <> struct Tex_I<4>
|
||||
{
|
||||
static __device__ __forceinline__ float4 read(float x, float y)
|
||||
{
|
||||
return tex2D(tex_If4, x, y);
|
||||
}
|
||||
};
|
||||
|
||||
template <int cn> struct Tex_J;
|
||||
template <> struct Tex_J<1>
|
||||
{
|
||||
static __device__ __forceinline__ float read(float x, float y)
|
||||
{
|
||||
return tex2D(tex_Jf, x, y);
|
||||
}
|
||||
};
|
||||
template <> struct Tex_J<4>
|
||||
{
|
||||
static __device__ __forceinline__ float4 read(float x, float y)
|
||||
{
|
||||
return tex2D(tex_Jf4, x, y);
|
||||
}
|
||||
};
|
||||
|
||||
__device__ __forceinline__ void accum(float& dst, float val)
|
||||
{
|
||||
dst += val;
|
||||
}
|
||||
__device__ __forceinline__ void accum(float& dst, const float4& val)
|
||||
{
|
||||
dst += val.x + val.y + val.z;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ float abs_(float a)
|
||||
{
|
||||
return ::fabsf(a);
|
||||
}
|
||||
__device__ __forceinline__ float4 abs_(const float4& a)
|
||||
{
|
||||
return abs(a);
|
||||
}
|
||||
|
||||
template <int cn, int PATCH_X, int PATCH_Y, bool calcErr>
|
||||
__global__ void sparseKernel(const float2* prevPts, float2* nextPts, uchar* status, float* err, const int level, const int rows, const int cols)
|
||||
{
|
||||
#if __CUDA_ARCH__ <= 110
|
||||
const int BLOCK_SIZE = 128;
|
||||
#else
|
||||
const int BLOCK_SIZE = 256;
|
||||
#endif
|
||||
|
||||
__shared__ float smem1[BLOCK_SIZE];
|
||||
__shared__ float smem2[BLOCK_SIZE];
|
||||
__shared__ float smem3[BLOCK_SIZE];
|
||||
|
||||
const unsigned int tid = threadIdx.y * blockDim.x + threadIdx.x;
|
||||
|
||||
float2 prevPt = prevPts[blockIdx.x];
|
||||
prevPt.x *= (1.0f / (1 << level));
|
||||
prevPt.y *= (1.0f / (1 << level));
|
||||
|
||||
if (prevPt.x < 0 || prevPt.x >= cols || prevPt.y < 0 || prevPt.y >= rows)
|
||||
{
|
||||
if (tid == 0 && level == 0)
|
||||
status[blockIdx.x] = 0;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
prevPt.x -= c_halfWin_x;
|
||||
prevPt.y -= c_halfWin_y;
|
||||
|
||||
// extract the patch from the first image, compute covariation matrix of derivatives
|
||||
|
||||
float A11 = 0;
|
||||
float A12 = 0;
|
||||
float A22 = 0;
|
||||
|
||||
typedef typename TypeVec<float, cn>::vec_type work_type;
|
||||
|
||||
work_type I_patch [PATCH_Y][PATCH_X];
|
||||
work_type dIdx_patch[PATCH_Y][PATCH_X];
|
||||
work_type dIdy_patch[PATCH_Y][PATCH_X];
|
||||
|
||||
for (int yBase = threadIdx.y, i = 0; yBase < c_winSize_y; yBase += blockDim.y, ++i)
|
||||
{
|
||||
for (int xBase = threadIdx.x, j = 0; xBase < c_winSize_x; xBase += blockDim.x, ++j)
|
||||
{
|
||||
float x = prevPt.x + xBase + 0.5f;
|
||||
float y = prevPt.y + yBase + 0.5f;
|
||||
|
||||
I_patch[i][j] = Tex_I<cn>::read(x, y);
|
||||
|
||||
// Sharr Deriv
|
||||
|
||||
work_type dIdx = 3.0f * Tex_I<cn>::read(x+1, y-1) + 10.0f * Tex_I<cn>::read(x+1, y) + 3.0f * Tex_I<cn>::read(x+1, y+1) -
|
||||
(3.0f * Tex_I<cn>::read(x-1, y-1) + 10.0f * Tex_I<cn>::read(x-1, y) + 3.0f * Tex_I<cn>::read(x-1, y+1));
|
||||
|
||||
work_type dIdy = 3.0f * Tex_I<cn>::read(x-1, y+1) + 10.0f * Tex_I<cn>::read(x, y+1) + 3.0f * Tex_I<cn>::read(x+1, y+1) -
|
||||
(3.0f * Tex_I<cn>::read(x-1, y-1) + 10.0f * Tex_I<cn>::read(x, y-1) + 3.0f * Tex_I<cn>::read(x+1, y-1));
|
||||
|
||||
dIdx_patch[i][j] = dIdx;
|
||||
dIdy_patch[i][j] = dIdy;
|
||||
|
||||
accum(A11, dIdx * dIdx);
|
||||
accum(A12, dIdx * dIdy);
|
||||
accum(A22, dIdy * dIdy);
|
||||
}
|
||||
}
|
||||
|
||||
reduce<BLOCK_SIZE>(smem_tuple(smem1, smem2, smem3), thrust::tie(A11, A12, A22), tid, thrust::make_tuple(plus<float>(), plus<float>(), plus<float>()));
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
if (tid == 0)
|
||||
{
|
||||
smem1[0] = A11;
|
||||
smem2[0] = A12;
|
||||
smem3[0] = A22;
|
||||
}
|
||||
#endif
|
||||
|
||||
__syncthreads();
|
||||
|
||||
A11 = smem1[0];
|
||||
A12 = smem2[0];
|
||||
A22 = smem3[0];
|
||||
|
||||
float D = A11 * A22 - A12 * A12;
|
||||
|
||||
if (D < numeric_limits<float>::epsilon())
|
||||
{
|
||||
if (tid == 0 && level == 0)
|
||||
status[blockIdx.x] = 0;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
D = 1.f / D;
|
||||
|
||||
A11 *= D;
|
||||
A12 *= D;
|
||||
A22 *= D;
|
||||
|
||||
float2 nextPt = nextPts[blockIdx.x];
|
||||
nextPt.x *= 2.f;
|
||||
nextPt.y *= 2.f;
|
||||
|
||||
nextPt.x -= c_halfWin_x;
|
||||
nextPt.y -= c_halfWin_y;
|
||||
|
||||
for (int k = 0; k < c_iters; ++k)
|
||||
{
|
||||
if (nextPt.x < -c_halfWin_x || nextPt.x >= cols || nextPt.y < -c_halfWin_y || nextPt.y >= rows)
|
||||
{
|
||||
if (tid == 0 && level == 0)
|
||||
status[blockIdx.x] = 0;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
float b1 = 0;
|
||||
float b2 = 0;
|
||||
|
||||
for (int y = threadIdx.y, i = 0; y < c_winSize_y; y += blockDim.y, ++i)
|
||||
{
|
||||
for (int x = threadIdx.x, j = 0; x < c_winSize_x; x += blockDim.x, ++j)
|
||||
{
|
||||
work_type I_val = I_patch[i][j];
|
||||
work_type J_val = Tex_J<cn>::read(nextPt.x + x + 0.5f, nextPt.y + y + 0.5f);
|
||||
|
||||
work_type diff = (J_val - I_val) * 32.0f;
|
||||
|
||||
accum(b1, diff * dIdx_patch[i][j]);
|
||||
accum(b2, diff * dIdy_patch[i][j]);
|
||||
}
|
||||
}
|
||||
|
||||
reduce<BLOCK_SIZE>(smem_tuple(smem1, smem2), thrust::tie(b1, b2), tid, thrust::make_tuple(plus<float>(), plus<float>()));
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
if (tid == 0)
|
||||
{
|
||||
smem1[0] = b1;
|
||||
smem2[0] = b2;
|
||||
}
|
||||
#endif
|
||||
|
||||
__syncthreads();
|
||||
|
||||
b1 = smem1[0];
|
||||
b2 = smem2[0];
|
||||
|
||||
float2 delta;
|
||||
delta.x = A12 * b2 - A22 * b1;
|
||||
delta.y = A12 * b1 - A11 * b2;
|
||||
|
||||
nextPt.x += delta.x;
|
||||
nextPt.y += delta.y;
|
||||
|
||||
if (::fabs(delta.x) < 0.01f && ::fabs(delta.y) < 0.01f)
|
||||
break;
|
||||
}
|
||||
|
||||
float errval = 0;
|
||||
if (calcErr)
|
||||
{
|
||||
for (int y = threadIdx.y, i = 0; y < c_winSize_y; y += blockDim.y, ++i)
|
||||
{
|
||||
for (int x = threadIdx.x, j = 0; x < c_winSize_x; x += blockDim.x, ++j)
|
||||
{
|
||||
work_type I_val = I_patch[i][j];
|
||||
work_type J_val = Tex_J<cn>::read(nextPt.x + x + 0.5f, nextPt.y + y + 0.5f);
|
||||
|
||||
work_type diff = J_val - I_val;
|
||||
|
||||
accum(errval, abs_(diff));
|
||||
}
|
||||
}
|
||||
|
||||
reduce<BLOCK_SIZE>(smem1, errval, tid, plus<float>());
|
||||
}
|
||||
|
||||
if (tid == 0)
|
||||
{
|
||||
nextPt.x += c_halfWin_x;
|
||||
nextPt.y += c_halfWin_y;
|
||||
|
||||
nextPts[blockIdx.x] = nextPt;
|
||||
|
||||
if (calcErr)
|
||||
err[blockIdx.x] = static_cast<float>(errval) / (cn * c_winSize_x * c_winSize_y);
|
||||
}
|
||||
}
|
||||
|
||||
template <int cn, int PATCH_X, int PATCH_Y>
|
||||
void sparse_caller(int rows, int cols, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
|
||||
int level, dim3 block, cudaStream_t stream)
|
||||
{
|
||||
dim3 grid(ptcount);
|
||||
|
||||
if (level == 0 && err)
|
||||
sparseKernel<cn, PATCH_X, PATCH_Y, true><<<grid, block>>>(prevPts, nextPts, status, err, level, rows, cols);
|
||||
else
|
||||
sparseKernel<cn, PATCH_X, PATCH_Y, false><<<grid, block>>>(prevPts, nextPts, status, err, level, rows, cols);
|
||||
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <bool calcErr>
|
||||
__global__ void denseKernel(PtrStepf u, PtrStepf v, const PtrStepf prevU, const PtrStepf prevV, PtrStepf err, const int rows, const int cols)
|
||||
{
|
||||
extern __shared__ int smem[];
|
||||
|
||||
const int patchWidth = blockDim.x + 2 * c_halfWin_x;
|
||||
const int patchHeight = blockDim.y + 2 * c_halfWin_y;
|
||||
|
||||
int* I_patch = smem;
|
||||
int* dIdx_patch = I_patch + patchWidth * patchHeight;
|
||||
int* dIdy_patch = dIdx_patch + patchWidth * patchHeight;
|
||||
|
||||
const int xBase = blockIdx.x * blockDim.x;
|
||||
const int yBase = blockIdx.y * blockDim.y;
|
||||
|
||||
for (int i = threadIdx.y; i < patchHeight; i += blockDim.y)
|
||||
{
|
||||
for (int j = threadIdx.x; j < patchWidth; j += blockDim.x)
|
||||
{
|
||||
float x = xBase - c_halfWin_x + j + 0.5f;
|
||||
float y = yBase - c_halfWin_y + i + 0.5f;
|
||||
|
||||
I_patch[i * patchWidth + j] = tex2D(tex_Ib, x, y);
|
||||
|
||||
// Sharr Deriv
|
||||
|
||||
dIdx_patch[i * patchWidth + j] = 3 * tex2D(tex_Ib, x+1, y-1) + 10 * tex2D(tex_Ib, x+1, y) + 3 * tex2D(tex_Ib, x+1, y+1) -
|
||||
(3 * tex2D(tex_Ib, x-1, y-1) + 10 * tex2D(tex_Ib, x-1, y) + 3 * tex2D(tex_Ib, x-1, y+1));
|
||||
|
||||
dIdy_patch[i * patchWidth + j] = 3 * tex2D(tex_Ib, x-1, y+1) + 10 * tex2D(tex_Ib, x, y+1) + 3 * tex2D(tex_Ib, x+1, y+1) -
|
||||
(3 * tex2D(tex_Ib, x-1, y-1) + 10 * tex2D(tex_Ib, x, y-1) + 3 * tex2D(tex_Ib, x+1, y-1));
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
const int x = xBase + threadIdx.x;
|
||||
const int y = yBase + threadIdx.y;
|
||||
|
||||
if (x >= cols || y >= rows)
|
||||
return;
|
||||
|
||||
int A11i = 0;
|
||||
int A12i = 0;
|
||||
int A22i = 0;
|
||||
|
||||
for (int i = 0; i < c_winSize_y; ++i)
|
||||
{
|
||||
for (int j = 0; j < c_winSize_x; ++j)
|
||||
{
|
||||
int dIdx = dIdx_patch[(threadIdx.y + i) * patchWidth + (threadIdx.x + j)];
|
||||
int dIdy = dIdy_patch[(threadIdx.y + i) * patchWidth + (threadIdx.x + j)];
|
||||
|
||||
A11i += dIdx * dIdx;
|
||||
A12i += dIdx * dIdy;
|
||||
A22i += dIdy * dIdy;
|
||||
}
|
||||
}
|
||||
|
||||
float A11 = A11i;
|
||||
float A12 = A12i;
|
||||
float A22 = A22i;
|
||||
|
||||
float D = A11 * A22 - A12 * A12;
|
||||
|
||||
if (D < numeric_limits<float>::epsilon())
|
||||
{
|
||||
if (calcErr)
|
||||
err(y, x) = numeric_limits<float>::max();
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
D = 1.f / D;
|
||||
|
||||
A11 *= D;
|
||||
A12 *= D;
|
||||
A22 *= D;
|
||||
|
||||
float2 nextPt;
|
||||
nextPt.x = x + prevU(y/2, x/2) * 2.0f;
|
||||
nextPt.y = y + prevV(y/2, x/2) * 2.0f;
|
||||
|
||||
for (int k = 0; k < c_iters; ++k)
|
||||
{
|
||||
if (nextPt.x < 0 || nextPt.x >= cols || nextPt.y < 0 || nextPt.y >= rows)
|
||||
{
|
||||
if (calcErr)
|
||||
err(y, x) = numeric_limits<float>::max();
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
int b1 = 0;
|
||||
int b2 = 0;
|
||||
|
||||
for (int i = 0; i < c_winSize_y; ++i)
|
||||
{
|
||||
for (int j = 0; j < c_winSize_x; ++j)
|
||||
{
|
||||
int I = I_patch[(threadIdx.y + i) * patchWidth + threadIdx.x + j];
|
||||
int J = tex2D(tex_Jf, nextPt.x - c_halfWin_x + j + 0.5f, nextPt.y - c_halfWin_y + i + 0.5f);
|
||||
|
||||
int diff = (J - I) * 32;
|
||||
|
||||
int dIdx = dIdx_patch[(threadIdx.y + i) * patchWidth + (threadIdx.x + j)];
|
||||
int dIdy = dIdy_patch[(threadIdx.y + i) * patchWidth + (threadIdx.x + j)];
|
||||
|
||||
b1 += diff * dIdx;
|
||||
b2 += diff * dIdy;
|
||||
}
|
||||
}
|
||||
|
||||
float2 delta;
|
||||
delta.x = A12 * b2 - A22 * b1;
|
||||
delta.y = A12 * b1 - A11 * b2;
|
||||
|
||||
nextPt.x += delta.x;
|
||||
nextPt.y += delta.y;
|
||||
|
||||
if (::fabs(delta.x) < 0.01f && ::fabs(delta.y) < 0.01f)
|
||||
break;
|
||||
}
|
||||
|
||||
u(y, x) = nextPt.x - x;
|
||||
v(y, x) = nextPt.y - y;
|
||||
|
||||
if (calcErr)
|
||||
{
|
||||
int errval = 0;
|
||||
|
||||
for (int i = 0; i < c_winSize_y; ++i)
|
||||
{
|
||||
for (int j = 0; j < c_winSize_x; ++j)
|
||||
{
|
||||
int I = I_patch[(threadIdx.y + i) * patchWidth + threadIdx.x + j];
|
||||
int J = tex2D(tex_Jf, nextPt.x - c_halfWin_x + j + 0.5f, nextPt.y - c_halfWin_y + i + 0.5f);
|
||||
|
||||
errval += ::abs(J - I);
|
||||
}
|
||||
}
|
||||
|
||||
err(y, x) = static_cast<float>(errval) / (c_winSize_x * c_winSize_y);
|
||||
}
|
||||
}
|
||||
|
||||
void loadConstants(int2 winSize, int iters)
|
||||
{
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_winSize_x, &winSize.x, sizeof(int)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_winSize_y, &winSize.y, sizeof(int)) );
|
||||
|
||||
int2 halfWin = make_int2((winSize.x - 1) / 2, (winSize.y - 1) / 2);
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_halfWin_x, &halfWin.x, sizeof(int)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_halfWin_y, &halfWin.y, sizeof(int)) );
|
||||
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_iters, &iters, sizeof(int)) );
|
||||
}
|
||||
|
||||
void sparse1(PtrStepSzf I, PtrStepSzf J, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
|
||||
int level, dim3 block, dim3 patch, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*func_t)(int rows, int cols, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
|
||||
int level, dim3 block, cudaStream_t stream);
|
||||
|
||||
static const func_t funcs[5][5] =
|
||||
{
|
||||
{sparse_caller<1, 1, 1>, sparse_caller<1, 2, 1>, sparse_caller<1, 3, 1>, sparse_caller<1, 4, 1>, sparse_caller<1, 5, 1>},
|
||||
{sparse_caller<1, 1, 2>, sparse_caller<1, 2, 2>, sparse_caller<1, 3, 2>, sparse_caller<1, 4, 2>, sparse_caller<1, 5, 2>},
|
||||
{sparse_caller<1, 1, 3>, sparse_caller<1, 2, 3>, sparse_caller<1, 3, 3>, sparse_caller<1, 4, 3>, sparse_caller<1, 5, 3>},
|
||||
{sparse_caller<1, 1, 4>, sparse_caller<1, 2, 4>, sparse_caller<1, 3, 4>, sparse_caller<1, 4, 4>, sparse_caller<1, 5, 4>},
|
||||
{sparse_caller<1, 1, 5>, sparse_caller<1, 2, 5>, sparse_caller<1, 3, 5>, sparse_caller<1, 4, 5>, sparse_caller<1, 5, 5>}
|
||||
};
|
||||
|
||||
bindTexture(&tex_If, I);
|
||||
bindTexture(&tex_Jf, J);
|
||||
|
||||
funcs[patch.y - 1][patch.x - 1](I.rows, I.cols, prevPts, nextPts, status, err, ptcount,
|
||||
level, block, stream);
|
||||
}
|
||||
|
||||
void sparse4(PtrStepSz<float4> I, PtrStepSz<float4> J, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
|
||||
int level, dim3 block, dim3 patch, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*func_t)(int rows, int cols, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
|
||||
int level, dim3 block, cudaStream_t stream);
|
||||
|
||||
static const func_t funcs[5][5] =
|
||||
{
|
||||
{sparse_caller<4, 1, 1>, sparse_caller<4, 2, 1>, sparse_caller<4, 3, 1>, sparse_caller<4, 4, 1>, sparse_caller<4, 5, 1>},
|
||||
{sparse_caller<4, 1, 2>, sparse_caller<4, 2, 2>, sparse_caller<4, 3, 2>, sparse_caller<4, 4, 2>, sparse_caller<4, 5, 2>},
|
||||
{sparse_caller<4, 1, 3>, sparse_caller<4, 2, 3>, sparse_caller<4, 3, 3>, sparse_caller<4, 4, 3>, sparse_caller<4, 5, 3>},
|
||||
{sparse_caller<4, 1, 4>, sparse_caller<4, 2, 4>, sparse_caller<4, 3, 4>, sparse_caller<4, 4, 4>, sparse_caller<4, 5, 4>},
|
||||
{sparse_caller<4, 1, 5>, sparse_caller<4, 2, 5>, sparse_caller<4, 3, 5>, sparse_caller<4, 4, 5>, sparse_caller<4, 5, 5>}
|
||||
};
|
||||
|
||||
bindTexture(&tex_If4, I);
|
||||
bindTexture(&tex_Jf4, J);
|
||||
|
||||
funcs[patch.y - 1][patch.x - 1](I.rows, I.cols, prevPts, nextPts, status, err, ptcount,
|
||||
level, block, stream);
|
||||
}
|
||||
|
||||
void dense(PtrStepSzb I, PtrStepSzf J, PtrStepSzf u, PtrStepSzf v, PtrStepSzf prevU, PtrStepSzf prevV, PtrStepSzf err, int2 winSize, cudaStream_t stream)
|
||||
{
|
||||
dim3 block(16, 16);
|
||||
dim3 grid(divUp(I.cols, block.x), divUp(I.rows, block.y));
|
||||
|
||||
bindTexture(&tex_Ib, I);
|
||||
bindTexture(&tex_Jf, J);
|
||||
|
||||
int2 halfWin = make_int2((winSize.x - 1) / 2, (winSize.y - 1) / 2);
|
||||
const int patchWidth = block.x + 2 * halfWin.x;
|
||||
const int patchHeight = block.y + 2 * halfWin.y;
|
||||
size_t smem_size = 3 * patchWidth * patchHeight * sizeof(int);
|
||||
|
||||
if (err.data)
|
||||
{
|
||||
denseKernel<true><<<grid, block, smem_size, stream>>>(u, v, prevU, prevV, err, I.rows, I.cols);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
}
|
||||
else
|
||||
{
|
||||
denseKernel<false><<<grid, block, smem_size, stream>>>(u, v, prevU, prevV, PtrStepf(), I.rows, I.cols);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
}
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
332
modules/gpuoptflow/src/cuda/tvl1flow.cu
Normal file
332
modules/gpuoptflow/src/cuda/tvl1flow.cu
Normal file
@@ -0,0 +1,332 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/border_interpolate.hpp"
|
||||
#include "opencv2/core/cuda/limits.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
using namespace cv::gpu::cudev;
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// centeredGradient
|
||||
|
||||
namespace tvl1flow
|
||||
{
|
||||
__global__ void centeredGradientKernel(const PtrStepSzf src, PtrStepf dx, PtrStepf dy)
|
||||
{
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (x >= src.cols || y >= src.rows)
|
||||
return;
|
||||
|
||||
dx(y, x) = 0.5f * (src(y, ::min(x + 1, src.cols - 1)) - src(y, ::max(x - 1, 0)));
|
||||
dy(y, x) = 0.5f * (src(::min(y + 1, src.rows - 1), x) - src(::max(y - 1, 0), x));
|
||||
}
|
||||
|
||||
void centeredGradient(PtrStepSzf src, PtrStepSzf dx, PtrStepSzf dy)
|
||||
{
|
||||
const dim3 block(32, 8);
|
||||
const dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
|
||||
|
||||
centeredGradientKernel<<<grid, block>>>(src, dx, dy);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// warpBackward
|
||||
|
||||
namespace tvl1flow
|
||||
{
|
||||
static __device__ __forceinline__ float bicubicCoeff(float x_)
|
||||
{
|
||||
float x = fabsf(x_);
|
||||
if (x <= 1.0f)
|
||||
{
|
||||
return x * x * (1.5f * x - 2.5f) + 1.0f;
|
||||
}
|
||||
else if (x < 2.0f)
|
||||
{
|
||||
return x * (x * (-0.5f * x + 2.5f) - 4.0f) + 2.0f;
|
||||
}
|
||||
else
|
||||
{
|
||||
return 0.0f;
|
||||
}
|
||||
}
|
||||
|
||||
texture<float, cudaTextureType2D, cudaReadModeElementType> tex_I1 (false, cudaFilterModePoint, cudaAddressModeClamp);
|
||||
texture<float, cudaTextureType2D, cudaReadModeElementType> tex_I1x(false, cudaFilterModePoint, cudaAddressModeClamp);
|
||||
texture<float, cudaTextureType2D, cudaReadModeElementType> tex_I1y(false, cudaFilterModePoint, cudaAddressModeClamp);
|
||||
|
||||
__global__ void warpBackwardKernel(const PtrStepSzf I0, const PtrStepf u1, const PtrStepf u2, PtrStepf I1w, PtrStepf I1wx, PtrStepf I1wy, PtrStepf grad, PtrStepf rho)
|
||||
{
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (x >= I0.cols || y >= I0.rows)
|
||||
return;
|
||||
|
||||
const float u1Val = u1(y, x);
|
||||
const float u2Val = u2(y, x);
|
||||
|
||||
const float wx = x + u1Val;
|
||||
const float wy = y + u2Val;
|
||||
|
||||
const int xmin = ::ceilf(wx - 2.0f);
|
||||
const int xmax = ::floorf(wx + 2.0f);
|
||||
|
||||
const int ymin = ::ceilf(wy - 2.0f);
|
||||
const int ymax = ::floorf(wy + 2.0f);
|
||||
|
||||
float sum = 0.0f;
|
||||
float sumx = 0.0f;
|
||||
float sumy = 0.0f;
|
||||
float wsum = 0.0f;
|
||||
|
||||
for (int cy = ymin; cy <= ymax; ++cy)
|
||||
{
|
||||
for (int cx = xmin; cx <= xmax; ++cx)
|
||||
{
|
||||
const float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy);
|
||||
|
||||
sum += w * tex2D(tex_I1 , cx, cy);
|
||||
sumx += w * tex2D(tex_I1x, cx, cy);
|
||||
sumy += w * tex2D(tex_I1y, cx, cy);
|
||||
|
||||
wsum += w;
|
||||
}
|
||||
}
|
||||
|
||||
const float coeff = 1.0f / wsum;
|
||||
|
||||
const float I1wVal = sum * coeff;
|
||||
const float I1wxVal = sumx * coeff;
|
||||
const float I1wyVal = sumy * coeff;
|
||||
|
||||
I1w(y, x) = I1wVal;
|
||||
I1wx(y, x) = I1wxVal;
|
||||
I1wy(y, x) = I1wyVal;
|
||||
|
||||
const float Ix2 = I1wxVal * I1wxVal;
|
||||
const float Iy2 = I1wyVal * I1wyVal;
|
||||
|
||||
// store the |Grad(I1)|^2
|
||||
grad(y, x) = Ix2 + Iy2;
|
||||
|
||||
// compute the constant part of the rho function
|
||||
const float I0Val = I0(y, x);
|
||||
rho(y, x) = I1wVal - I1wxVal * u1Val - I1wyVal * u2Val - I0Val;
|
||||
}
|
||||
|
||||
void warpBackward(PtrStepSzf I0, PtrStepSzf I1, PtrStepSzf I1x, PtrStepSzf I1y, PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf I1w, PtrStepSzf I1wx, PtrStepSzf I1wy, PtrStepSzf grad, PtrStepSzf rho)
|
||||
{
|
||||
const dim3 block(32, 8);
|
||||
const dim3 grid(divUp(I0.cols, block.x), divUp(I0.rows, block.y));
|
||||
|
||||
bindTexture(&tex_I1 , I1);
|
||||
bindTexture(&tex_I1x, I1x);
|
||||
bindTexture(&tex_I1y, I1y);
|
||||
|
||||
warpBackwardKernel<<<grid, block>>>(I0, u1, u2, I1w, I1wx, I1wy, grad, rho);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// estimateU
|
||||
|
||||
namespace tvl1flow
|
||||
{
|
||||
__device__ float divergence(const PtrStepf& v1, const PtrStepf& v2, int y, int x)
|
||||
{
|
||||
if (x > 0 && y > 0)
|
||||
{
|
||||
const float v1x = v1(y, x) - v1(y, x - 1);
|
||||
const float v2y = v2(y, x) - v2(y - 1, x);
|
||||
return v1x + v2y;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (y > 0)
|
||||
return v1(y, 0) + v2(y, 0) - v2(y - 1, 0);
|
||||
else
|
||||
{
|
||||
if (x > 0)
|
||||
return v1(0, x) - v1(0, x - 1) + v2(0, x);
|
||||
else
|
||||
return v1(0, 0) + v2(0, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void estimateUKernel(const PtrStepSzf I1wx, const PtrStepf I1wy,
|
||||
const PtrStepf grad, const PtrStepf rho_c,
|
||||
const PtrStepf p11, const PtrStepf p12, const PtrStepf p21, const PtrStepf p22,
|
||||
PtrStepf u1, PtrStepf u2, PtrStepf error,
|
||||
const float l_t, const float theta)
|
||||
{
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (x >= I1wx.cols || y >= I1wx.rows)
|
||||
return;
|
||||
|
||||
const float I1wxVal = I1wx(y, x);
|
||||
const float I1wyVal = I1wy(y, x);
|
||||
const float gradVal = grad(y, x);
|
||||
const float u1OldVal = u1(y, x);
|
||||
const float u2OldVal = u2(y, x);
|
||||
|
||||
const float rho = rho_c(y, x) + (I1wxVal * u1OldVal + I1wyVal * u2OldVal);
|
||||
|
||||
// estimate the values of the variable (v1, v2) (thresholding operator TH)
|
||||
|
||||
float d1 = 0.0f;
|
||||
float d2 = 0.0f;
|
||||
|
||||
if (rho < -l_t * gradVal)
|
||||
{
|
||||
d1 = l_t * I1wxVal;
|
||||
d2 = l_t * I1wyVal;
|
||||
}
|
||||
else if (rho > l_t * gradVal)
|
||||
{
|
||||
d1 = -l_t * I1wxVal;
|
||||
d2 = -l_t * I1wyVal;
|
||||
}
|
||||
else if (gradVal > numeric_limits<float>::epsilon())
|
||||
{
|
||||
const float fi = -rho / gradVal;
|
||||
d1 = fi * I1wxVal;
|
||||
d2 = fi * I1wyVal;
|
||||
}
|
||||
|
||||
const float v1 = u1OldVal + d1;
|
||||
const float v2 = u2OldVal + d2;
|
||||
|
||||
// compute the divergence of the dual variable (p1, p2)
|
||||
|
||||
const float div_p1 = divergence(p11, p12, y, x);
|
||||
const float div_p2 = divergence(p21, p22, y, x);
|
||||
|
||||
// estimate the values of the optical flow (u1, u2)
|
||||
|
||||
const float u1NewVal = v1 + theta * div_p1;
|
||||
const float u2NewVal = v2 + theta * div_p2;
|
||||
|
||||
u1(y, x) = u1NewVal;
|
||||
u2(y, x) = u2NewVal;
|
||||
|
||||
const float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal);
|
||||
const float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal);
|
||||
error(y, x) = n1 + n2;
|
||||
}
|
||||
|
||||
void estimateU(PtrStepSzf I1wx, PtrStepSzf I1wy,
|
||||
PtrStepSzf grad, PtrStepSzf rho_c,
|
||||
PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22,
|
||||
PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf error,
|
||||
float l_t, float theta)
|
||||
{
|
||||
const dim3 block(32, 8);
|
||||
const dim3 grid(divUp(I1wx.cols, block.x), divUp(I1wx.rows, block.y));
|
||||
|
||||
estimateUKernel<<<grid, block>>>(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, u1, u2, error, l_t, theta);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// estimateDualVariables
|
||||
|
||||
namespace tvl1flow
|
||||
{
|
||||
__global__ void estimateDualVariablesKernel(const PtrStepSzf u1, const PtrStepf u2, PtrStepf p11, PtrStepf p12, PtrStepf p21, PtrStepf p22, const float taut)
|
||||
{
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (x >= u1.cols || y >= u1.rows)
|
||||
return;
|
||||
|
||||
const float u1x = u1(y, ::min(x + 1, u1.cols - 1)) - u1(y, x);
|
||||
const float u1y = u1(::min(y + 1, u1.rows - 1), x) - u1(y, x);
|
||||
|
||||
const float u2x = u2(y, ::min(x + 1, u1.cols - 1)) - u2(y, x);
|
||||
const float u2y = u2(::min(y + 1, u1.rows - 1), x) - u2(y, x);
|
||||
|
||||
const float g1 = ::hypotf(u1x, u1y);
|
||||
const float g2 = ::hypotf(u2x, u2y);
|
||||
|
||||
const float ng1 = 1.0f + taut * g1;
|
||||
const float ng2 = 1.0f + taut * g2;
|
||||
|
||||
p11(y, x) = (p11(y, x) + taut * u1x) / ng1;
|
||||
p12(y, x) = (p12(y, x) + taut * u1y) / ng1;
|
||||
p21(y, x) = (p21(y, x) + taut * u2x) / ng2;
|
||||
p22(y, x) = (p22(y, x) + taut * u2y) / ng2;
|
||||
}
|
||||
|
||||
void estimateDualVariables(PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, float taut)
|
||||
{
|
||||
const dim3 block(32, 8);
|
||||
const dim3 grid(divUp(u1.cols, block.x), divUp(u1.rows, block.y));
|
||||
|
||||
estimateDualVariablesKernel<<<grid, block>>>(u1, u2, p11, p12, p21, p22, taut);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
|
||||
#endif // !defined CUDA_DISABLER
|
242
modules/gpuoptflow/src/optflowbm.cpp
Normal file
242
modules/gpuoptflow/src/optflowbm.cpp
Normal file
@@ -0,0 +1,242 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::gpu;
|
||||
|
||||
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
|
||||
|
||||
void cv::gpu::calcOpticalFlowBM(const GpuMat&, const GpuMat&, Size, Size, Size, bool, GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
|
||||
|
||||
void cv::gpu::FastOpticalFlowBM::operator ()(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, int, int, Stream&) { throw_no_cuda(); }
|
||||
|
||||
#else // HAVE_CUDA
|
||||
|
||||
namespace optflowbm
|
||||
{
|
||||
void calc(PtrStepSzb prev, PtrStepSzb curr, PtrStepSzf velx, PtrStepSzf vely, int2 blockSize, int2 shiftSize, bool usePrevious,
|
||||
int maxX, int maxY, int acceptLevel, int escapeLevel, const short2* ss, int ssCount, cudaStream_t stream);
|
||||
}
|
||||
|
||||
void cv::gpu::calcOpticalFlowBM(const GpuMat& prev, const GpuMat& curr, Size blockSize, Size shiftSize, Size maxRange, bool usePrevious, GpuMat& velx, GpuMat& vely, GpuMat& buf, Stream& st)
|
||||
{
|
||||
CV_Assert( prev.type() == CV_8UC1 );
|
||||
CV_Assert( curr.size() == prev.size() && curr.type() == prev.type() );
|
||||
|
||||
const Size velSize((prev.cols - blockSize.width + shiftSize.width) / shiftSize.width,
|
||||
(prev.rows - blockSize.height + shiftSize.height) / shiftSize.height);
|
||||
|
||||
velx.create(velSize, CV_32FC1);
|
||||
vely.create(velSize, CV_32FC1);
|
||||
|
||||
// scanning scheme coordinates
|
||||
std::vector<short2> ss((2 * maxRange.width + 1) * (2 * maxRange.height + 1));
|
||||
int ssCount = 0;
|
||||
|
||||
// Calculate scanning scheme
|
||||
const int minCount = std::min(maxRange.width, maxRange.height);
|
||||
|
||||
// use spiral search pattern
|
||||
//
|
||||
// 9 10 11 12
|
||||
// 8 1 2 13
|
||||
// 7 * 3 14
|
||||
// 6 5 4 15
|
||||
//... 20 19 18 17
|
||||
//
|
||||
|
||||
for (int i = 0; i < minCount; ++i)
|
||||
{
|
||||
// four cycles along sides
|
||||
int x = -i - 1, y = x;
|
||||
|
||||
// upper side
|
||||
for (int j = -i; j <= i + 1; ++j, ++ssCount)
|
||||
{
|
||||
ss[ssCount].x = ++x;
|
||||
ss[ssCount].y = y;
|
||||
}
|
||||
|
||||
// right side
|
||||
for (int j = -i; j <= i + 1; ++j, ++ssCount)
|
||||
{
|
||||
ss[ssCount].x = x;
|
||||
ss[ssCount].y = ++y;
|
||||
}
|
||||
|
||||
// bottom side
|
||||
for (int j = -i; j <= i + 1; ++j, ++ssCount)
|
||||
{
|
||||
ss[ssCount].x = --x;
|
||||
ss[ssCount].y = y;
|
||||
}
|
||||
|
||||
// left side
|
||||
for (int j = -i; j <= i + 1; ++j, ++ssCount)
|
||||
{
|
||||
ss[ssCount].x = x;
|
||||
ss[ssCount].y = --y;
|
||||
}
|
||||
}
|
||||
|
||||
// the rest part
|
||||
if (maxRange.width < maxRange.height)
|
||||
{
|
||||
const int xleft = -minCount;
|
||||
|
||||
// cycle by neighbor rings
|
||||
for (int i = minCount; i < maxRange.height; ++i)
|
||||
{
|
||||
// two cycles by x
|
||||
int y = -(i + 1);
|
||||
int x = xleft;
|
||||
|
||||
// upper side
|
||||
for (int j = -maxRange.width; j <= maxRange.width; ++j, ++ssCount, ++x)
|
||||
{
|
||||
ss[ssCount].x = x;
|
||||
ss[ssCount].y = y;
|
||||
}
|
||||
|
||||
x = xleft;
|
||||
y = -y;
|
||||
|
||||
// bottom side
|
||||
for (int j = -maxRange.width; j <= maxRange.width; ++j, ++ssCount, ++x)
|
||||
{
|
||||
ss[ssCount].x = x;
|
||||
ss[ssCount].y = y;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (maxRange.width > maxRange.height)
|
||||
{
|
||||
const int yupper = -minCount;
|
||||
|
||||
// cycle by neighbor rings
|
||||
for (int i = minCount; i < maxRange.width; ++i)
|
||||
{
|
||||
// two cycles by y
|
||||
int x = -(i + 1);
|
||||
int y = yupper;
|
||||
|
||||
// left side
|
||||
for (int j = -maxRange.height; j <= maxRange.height; ++j, ++ssCount, ++y)
|
||||
{
|
||||
ss[ssCount].x = x;
|
||||
ss[ssCount].y = y;
|
||||
}
|
||||
|
||||
y = yupper;
|
||||
x = -x;
|
||||
|
||||
// right side
|
||||
for (int j = -maxRange.height; j <= maxRange.height; ++j, ++ssCount, ++y)
|
||||
{
|
||||
ss[ssCount].x = x;
|
||||
ss[ssCount].y = y;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const cudaStream_t stream = StreamAccessor::getStream(st);
|
||||
|
||||
ensureSizeIsEnough(1, ssCount, CV_16SC2, buf);
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaMemcpy(buf.data, &ss[0], ssCount * sizeof(short2), cudaMemcpyHostToDevice) );
|
||||
else
|
||||
cudaSafeCall( cudaMemcpyAsync(buf.data, &ss[0], ssCount * sizeof(short2), cudaMemcpyHostToDevice, stream) );
|
||||
|
||||
const int maxX = prev.cols - blockSize.width;
|
||||
const int maxY = prev.rows - blockSize.height;
|
||||
|
||||
const int SMALL_DIFF = 2;
|
||||
const int BIG_DIFF = 128;
|
||||
|
||||
const int blSize = blockSize.area();
|
||||
const int acceptLevel = blSize * SMALL_DIFF;
|
||||
const int escapeLevel = blSize * BIG_DIFF;
|
||||
|
||||
optflowbm::calc(prev, curr, velx, vely,
|
||||
make_int2(blockSize.width, blockSize.height), make_int2(shiftSize.width, shiftSize.height), usePrevious,
|
||||
maxX, maxY, acceptLevel, escapeLevel, buf.ptr<short2>(), ssCount, stream);
|
||||
}
|
||||
|
||||
namespace optflowbm_fast
|
||||
{
|
||||
void get_buffer_size(int src_cols, int src_rows, int search_window, int block_window, int& buffer_cols, int& buffer_rows);
|
||||
|
||||
template <typename T>
|
||||
void calc(PtrStepSzb I0, PtrStepSzb I1, PtrStepSzf velx, PtrStepSzf vely, PtrStepi buffer, int search_window, int block_window, cudaStream_t stream);
|
||||
}
|
||||
|
||||
void cv::gpu::FastOpticalFlowBM::operator ()(const GpuMat& I0, const GpuMat& I1, GpuMat& flowx, GpuMat& flowy, int search_window, int block_window, Stream& stream)
|
||||
{
|
||||
CV_Assert( I0.type() == CV_8UC1 );
|
||||
CV_Assert( I1.size() == I0.size() && I1.type() == I0.type() );
|
||||
|
||||
int border_size = search_window / 2 + block_window / 2;
|
||||
Size esize = I0.size() + Size(border_size, border_size) * 2;
|
||||
|
||||
ensureSizeIsEnough(esize, I0.type(), extended_I0);
|
||||
ensureSizeIsEnough(esize, I0.type(), extended_I1);
|
||||
|
||||
copyMakeBorder(I0, extended_I0, border_size, border_size, border_size, border_size, cv::BORDER_DEFAULT, Scalar(), stream);
|
||||
copyMakeBorder(I1, extended_I1, border_size, border_size, border_size, border_size, cv::BORDER_DEFAULT, Scalar(), stream);
|
||||
|
||||
GpuMat I0_hdr = extended_I0(Rect(Point2i(border_size, border_size), I0.size()));
|
||||
GpuMat I1_hdr = extended_I1(Rect(Point2i(border_size, border_size), I0.size()));
|
||||
|
||||
int bcols, brows;
|
||||
optflowbm_fast::get_buffer_size(I0.cols, I0.rows, search_window, block_window, bcols, brows);
|
||||
|
||||
ensureSizeIsEnough(brows, bcols, CV_32SC1, buffer);
|
||||
|
||||
flowx.create(I0.size(), CV_32FC1);
|
||||
flowy.create(I0.size(), CV_32FC1);
|
||||
|
||||
optflowbm_fast::calc<uchar>(I0_hdr, I1_hdr, flowx, flowy, buffer, search_window, block_window, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
#endif // HAVE_CUDA
|
237
modules/gpuoptflow/src/optical_flow.cpp
Normal file
237
modules/gpuoptflow/src/optical_flow.cpp
Normal file
@@ -0,0 +1,237 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::gpu;
|
||||
|
||||
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
|
||||
|
||||
void cv::gpu::BroxOpticalFlow::operator ()(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
|
||||
void cv::gpu::interpolateFrames(const GpuMat&, const GpuMat&, const GpuMat&, const GpuMat&, const GpuMat&, const GpuMat&, float, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
|
||||
void cv::gpu::createOpticalFlowNeedleMap(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&) { throw_no_cuda(); }
|
||||
|
||||
#else
|
||||
|
||||
namespace
|
||||
{
|
||||
size_t getBufSize(const NCVBroxOpticalFlowDescriptor& desc, const NCVMatrix<Ncv32f>& frame0, const NCVMatrix<Ncv32f>& frame1,
|
||||
NCVMatrix<Ncv32f>& u, NCVMatrix<Ncv32f>& v, const cudaDeviceProp& devProp)
|
||||
{
|
||||
NCVMemStackAllocator gpuCounter(static_cast<Ncv32u>(devProp.textureAlignment));
|
||||
|
||||
ncvSafeCall( NCVBroxOpticalFlow(desc, gpuCounter, frame0, frame1, u, v, 0) );
|
||||
|
||||
return gpuCounter.maxSize();
|
||||
}
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
static void outputHandler(const String &msg) { CV_Error(cv::Error::GpuApiCallError, msg.c_str()); }
|
||||
}
|
||||
|
||||
void cv::gpu::BroxOpticalFlow::operator ()(const GpuMat& frame0, const GpuMat& frame1, GpuMat& u, GpuMat& v, Stream& s)
|
||||
{
|
||||
ncvSetDebugOutputHandler(outputHandler);
|
||||
|
||||
CV_Assert(frame0.type() == CV_32FC1);
|
||||
CV_Assert(frame1.size() == frame0.size() && frame1.type() == frame0.type());
|
||||
|
||||
u.create(frame0.size(), CV_32FC1);
|
||||
v.create(frame0.size(), CV_32FC1);
|
||||
|
||||
cudaDeviceProp devProp;
|
||||
cudaSafeCall( cudaGetDeviceProperties(&devProp, getDevice()) );
|
||||
|
||||
NCVBroxOpticalFlowDescriptor desc;
|
||||
|
||||
desc.alpha = alpha;
|
||||
desc.gamma = gamma;
|
||||
desc.scale_factor = scale_factor;
|
||||
desc.number_of_inner_iterations = inner_iterations;
|
||||
desc.number_of_outer_iterations = outer_iterations;
|
||||
desc.number_of_solver_iterations = solver_iterations;
|
||||
|
||||
NCVMemSegment frame0MemSeg;
|
||||
frame0MemSeg.begin.memtype = NCVMemoryTypeDevice;
|
||||
frame0MemSeg.begin.ptr = const_cast<uchar*>(frame0.data);
|
||||
frame0MemSeg.size = frame0.step * frame0.rows;
|
||||
|
||||
NCVMemSegment frame1MemSeg;
|
||||
frame1MemSeg.begin.memtype = NCVMemoryTypeDevice;
|
||||
frame1MemSeg.begin.ptr = const_cast<uchar*>(frame1.data);
|
||||
frame1MemSeg.size = frame1.step * frame1.rows;
|
||||
|
||||
NCVMemSegment uMemSeg;
|
||||
uMemSeg.begin.memtype = NCVMemoryTypeDevice;
|
||||
uMemSeg.begin.ptr = u.ptr();
|
||||
uMemSeg.size = u.step * u.rows;
|
||||
|
||||
NCVMemSegment vMemSeg;
|
||||
vMemSeg.begin.memtype = NCVMemoryTypeDevice;
|
||||
vMemSeg.begin.ptr = v.ptr();
|
||||
vMemSeg.size = v.step * v.rows;
|
||||
|
||||
NCVMatrixReuse<Ncv32f> frame0Mat(frame0MemSeg, static_cast<Ncv32u>(devProp.textureAlignment), frame0.cols, frame0.rows, static_cast<Ncv32u>(frame0.step));
|
||||
NCVMatrixReuse<Ncv32f> frame1Mat(frame1MemSeg, static_cast<Ncv32u>(devProp.textureAlignment), frame1.cols, frame1.rows, static_cast<Ncv32u>(frame1.step));
|
||||
NCVMatrixReuse<Ncv32f> uMat(uMemSeg, static_cast<Ncv32u>(devProp.textureAlignment), u.cols, u.rows, static_cast<Ncv32u>(u.step));
|
||||
NCVMatrixReuse<Ncv32f> vMat(vMemSeg, static_cast<Ncv32u>(devProp.textureAlignment), v.cols, v.rows, static_cast<Ncv32u>(v.step));
|
||||
|
||||
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||
|
||||
size_t bufSize = getBufSize(desc, frame0Mat, frame1Mat, uMat, vMat, devProp);
|
||||
|
||||
ensureSizeIsEnough(1, static_cast<int>(bufSize), CV_8UC1, buf);
|
||||
|
||||
NCVMemStackAllocator gpuAllocator(NCVMemoryTypeDevice, bufSize, static_cast<Ncv32u>(devProp.textureAlignment), buf.ptr());
|
||||
|
||||
ncvSafeCall( NCVBroxOpticalFlow(desc, gpuAllocator, frame0Mat, frame1Mat, uMat, vMat, stream) );
|
||||
}
|
||||
|
||||
void cv::gpu::interpolateFrames(const GpuMat& frame0, const GpuMat& frame1, const GpuMat& fu, const GpuMat& fv, const GpuMat& bu, const GpuMat& bv,
|
||||
float pos, GpuMat& newFrame, GpuMat& buf, Stream& s)
|
||||
{
|
||||
CV_Assert(frame0.type() == CV_32FC1);
|
||||
CV_Assert(frame1.size() == frame0.size() && frame1.type() == frame0.type());
|
||||
CV_Assert(fu.size() == frame0.size() && fu.type() == frame0.type());
|
||||
CV_Assert(fv.size() == frame0.size() && fv.type() == frame0.type());
|
||||
CV_Assert(bu.size() == frame0.size() && bu.type() == frame0.type());
|
||||
CV_Assert(bv.size() == frame0.size() && bv.type() == frame0.type());
|
||||
|
||||
newFrame.create(frame0.size(), frame0.type());
|
||||
|
||||
buf.create(6 * frame0.rows, frame0.cols, CV_32FC1);
|
||||
buf.setTo(Scalar::all(0));
|
||||
|
||||
// occlusion masks
|
||||
GpuMat occ0 = buf.rowRange(0 * frame0.rows, 1 * frame0.rows);
|
||||
GpuMat occ1 = buf.rowRange(1 * frame0.rows, 2 * frame0.rows);
|
||||
|
||||
// interpolated forward flow
|
||||
GpuMat fui = buf.rowRange(2 * frame0.rows, 3 * frame0.rows);
|
||||
GpuMat fvi = buf.rowRange(3 * frame0.rows, 4 * frame0.rows);
|
||||
|
||||
// interpolated backward flow
|
||||
GpuMat bui = buf.rowRange(4 * frame0.rows, 5 * frame0.rows);
|
||||
GpuMat bvi = buf.rowRange(5 * frame0.rows, 6 * frame0.rows);
|
||||
|
||||
size_t step = frame0.step;
|
||||
|
||||
CV_Assert(frame1.step == step && fu.step == step && fv.step == step && bu.step == step && bv.step == step && newFrame.step == step && buf.step == step);
|
||||
|
||||
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||
NppStStreamHandler h(stream);
|
||||
|
||||
NppStInterpolationState state;
|
||||
|
||||
state.size = NcvSize32u(frame0.cols, frame0.rows);
|
||||
state.nStep = static_cast<Ncv32u>(step);
|
||||
state.pSrcFrame0 = const_cast<Ncv32f*>(frame0.ptr<Ncv32f>());
|
||||
state.pSrcFrame1 = const_cast<Ncv32f*>(frame1.ptr<Ncv32f>());
|
||||
state.pFU = const_cast<Ncv32f*>(fu.ptr<Ncv32f>());
|
||||
state.pFV = const_cast<Ncv32f*>(fv.ptr<Ncv32f>());
|
||||
state.pBU = const_cast<Ncv32f*>(bu.ptr<Ncv32f>());
|
||||
state.pBV = const_cast<Ncv32f*>(bv.ptr<Ncv32f>());
|
||||
state.pos = pos;
|
||||
state.pNewFrame = newFrame.ptr<Ncv32f>();
|
||||
state.ppBuffers[0] = occ0.ptr<Ncv32f>();
|
||||
state.ppBuffers[1] = occ1.ptr<Ncv32f>();
|
||||
state.ppBuffers[2] = fui.ptr<Ncv32f>();
|
||||
state.ppBuffers[3] = fvi.ptr<Ncv32f>();
|
||||
state.ppBuffers[4] = bui.ptr<Ncv32f>();
|
||||
state.ppBuffers[5] = bvi.ptr<Ncv32f>();
|
||||
|
||||
ncvSafeCall( nppiStInterpolateFrames(&state) );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
namespace optical_flow
|
||||
{
|
||||
void NeedleMapAverage_gpu(PtrStepSzf u, PtrStepSzf v, PtrStepSzf u_avg, PtrStepSzf v_avg);
|
||||
void CreateOpticalFlowNeedleMap_gpu(PtrStepSzf u_avg, PtrStepSzf v_avg, float* vertex_buffer, float* color_data, float max_flow, float xscale, float yscale);
|
||||
}
|
||||
}}}
|
||||
|
||||
void cv::gpu::createOpticalFlowNeedleMap(const GpuMat& u, const GpuMat& v, GpuMat& vertex, GpuMat& colors)
|
||||
{
|
||||
using namespace cv::gpu::cudev::optical_flow;
|
||||
|
||||
CV_Assert(u.type() == CV_32FC1);
|
||||
CV_Assert(v.type() == u.type() && v.size() == u.size());
|
||||
|
||||
const int NEEDLE_MAP_SCALE = 16;
|
||||
|
||||
const int x_needles = u.cols / NEEDLE_MAP_SCALE;
|
||||
const int y_needles = u.rows / NEEDLE_MAP_SCALE;
|
||||
|
||||
GpuMat u_avg(y_needles, x_needles, CV_32FC1);
|
||||
GpuMat v_avg(y_needles, x_needles, CV_32FC1);
|
||||
|
||||
NeedleMapAverage_gpu(u, v, u_avg, v_avg);
|
||||
|
||||
const int NUM_VERTS_PER_ARROW = 6;
|
||||
|
||||
const int num_arrows = x_needles * y_needles * NUM_VERTS_PER_ARROW;
|
||||
|
||||
vertex.create(1, num_arrows, CV_32FC3);
|
||||
colors.create(1, num_arrows, CV_32FC3);
|
||||
|
||||
colors.setTo(Scalar::all(1.0));
|
||||
|
||||
double uMax, vMax;
|
||||
minMax(u_avg, 0, &uMax);
|
||||
minMax(v_avg, 0, &vMax);
|
||||
|
||||
float max_flow = static_cast<float>(std::sqrt(uMax * uMax + vMax * vMax));
|
||||
|
||||
CreateOpticalFlowNeedleMap_gpu(u_avg, v_avg, vertex.ptr<float>(), colors.ptr<float>(), max_flow, 1.0f / u.cols, 1.0f / u.rows);
|
||||
|
||||
cvtColor(colors, colors, COLOR_HSV2RGB);
|
||||
}
|
||||
|
||||
#endif /* HAVE_CUDA */
|
409
modules/gpuoptflow/src/optical_flow_farneback.cpp
Normal file
409
modules/gpuoptflow/src/optical_flow_farneback.cpp
Normal file
@@ -0,0 +1,409 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
#define MIN_SIZE 32
|
||||
|
||||
#define S(x) StreamAccessor::getStream(x)
|
||||
|
||||
// GPU resize() is fast, but it differs from the CPU analog. Disabling this flag
|
||||
// leads to an inefficient code. It's for debug purposes only.
|
||||
#define ENABLE_GPU_RESIZE 1
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::gpu;
|
||||
|
||||
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
|
||||
|
||||
void cv::gpu::FarnebackOpticalFlow::operator ()(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
|
||||
|
||||
#else
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev { namespace optflow_farneback
|
||||
{
|
||||
void setPolynomialExpansionConsts(
|
||||
int polyN, const float *g, const float *xg, const float *xxg,
|
||||
float ig11, float ig03, float ig33, float ig55);
|
||||
|
||||
void polynomialExpansionGpu(const PtrStepSzf &src, int polyN, PtrStepSzf dst, cudaStream_t stream);
|
||||
|
||||
void setUpdateMatricesConsts();
|
||||
|
||||
void updateMatricesGpu(
|
||||
const PtrStepSzf flowx, const PtrStepSzf flowy, const PtrStepSzf R0, const PtrStepSzf R1,
|
||||
PtrStepSzf M, cudaStream_t stream);
|
||||
|
||||
void updateFlowGpu(
|
||||
const PtrStepSzf M, PtrStepSzf flowx, PtrStepSzf flowy, cudaStream_t stream);
|
||||
|
||||
/*void boxFilterGpu(const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, cudaStream_t stream);*/
|
||||
|
||||
void boxFilter5Gpu(const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, cudaStream_t stream);
|
||||
|
||||
void boxFilter5Gpu_CC11(const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, cudaStream_t stream);
|
||||
|
||||
void setGaussianBlurKernel(const float *gKer, int ksizeHalf);
|
||||
|
||||
void gaussianBlurGpu(
|
||||
const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, int borderType, cudaStream_t stream);
|
||||
|
||||
void gaussianBlur5Gpu(
|
||||
const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, int borderType, cudaStream_t stream);
|
||||
|
||||
void gaussianBlur5Gpu_CC11(
|
||||
const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, int borderType, cudaStream_t stream);
|
||||
|
||||
}}}} // namespace cv { namespace gpu { namespace cudev { namespace optflow_farneback
|
||||
|
||||
|
||||
void cv::gpu::FarnebackOpticalFlow::prepareGaussian(
|
||||
int n, double sigma, float *g, float *xg, float *xxg,
|
||||
double &ig11, double &ig03, double &ig33, double &ig55)
|
||||
{
|
||||
double s = 0.;
|
||||
for (int x = -n; x <= n; x++)
|
||||
{
|
||||
g[x] = (float)std::exp(-x*x/(2*sigma*sigma));
|
||||
s += g[x];
|
||||
}
|
||||
|
||||
s = 1./s;
|
||||
for (int x = -n; x <= n; x++)
|
||||
{
|
||||
g[x] = (float)(g[x]*s);
|
||||
xg[x] = (float)(x*g[x]);
|
||||
xxg[x] = (float)(x*x*g[x]);
|
||||
}
|
||||
|
||||
Mat_<double> G(6, 6);
|
||||
G.setTo(0);
|
||||
|
||||
for (int y = -n; y <= n; y++)
|
||||
{
|
||||
for (int x = -n; x <= n; x++)
|
||||
{
|
||||
G(0,0) += g[y]*g[x];
|
||||
G(1,1) += g[y]*g[x]*x*x;
|
||||
G(3,3) += g[y]*g[x]*x*x*x*x;
|
||||
G(5,5) += g[y]*g[x]*x*x*y*y;
|
||||
}
|
||||
}
|
||||
|
||||
//G[0][0] = 1.;
|
||||
G(2,2) = G(0,3) = G(0,4) = G(3,0) = G(4,0) = G(1,1);
|
||||
G(4,4) = G(3,3);
|
||||
G(3,4) = G(4,3) = G(5,5);
|
||||
|
||||
// invG:
|
||||
// [ x e e ]
|
||||
// [ y ]
|
||||
// [ y ]
|
||||
// [ e z ]
|
||||
// [ e z ]
|
||||
// [ u ]
|
||||
Mat_<double> invG = G.inv(DECOMP_CHOLESKY);
|
||||
|
||||
ig11 = invG(1,1);
|
||||
ig03 = invG(0,3);
|
||||
ig33 = invG(3,3);
|
||||
ig55 = invG(5,5);
|
||||
}
|
||||
|
||||
|
||||
void cv::gpu::FarnebackOpticalFlow::setPolynomialExpansionConsts(int n, double sigma)
|
||||
{
|
||||
std::vector<float> buf(n*6 + 3);
|
||||
float* g = &buf[0] + n;
|
||||
float* xg = g + n*2 + 1;
|
||||
float* xxg = xg + n*2 + 1;
|
||||
|
||||
if (sigma < FLT_EPSILON)
|
||||
sigma = n*0.3;
|
||||
|
||||
double ig11, ig03, ig33, ig55;
|
||||
prepareGaussian(n, sigma, g, xg, xxg, ig11, ig03, ig33, ig55);
|
||||
|
||||
cudev::optflow_farneback::setPolynomialExpansionConsts(n, g, xg, xxg, static_cast<float>(ig11), static_cast<float>(ig03), static_cast<float>(ig33), static_cast<float>(ig55));
|
||||
}
|
||||
|
||||
|
||||
void cv::gpu::FarnebackOpticalFlow::updateFlow_boxFilter(
|
||||
const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat &flowy,
|
||||
GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[])
|
||||
{
|
||||
if (deviceSupports(FEATURE_SET_COMPUTE_12))
|
||||
cudev::optflow_farneback::boxFilter5Gpu(M, blockSize/2, bufM, S(streams[0]));
|
||||
else
|
||||
cudev::optflow_farneback::boxFilter5Gpu_CC11(M, blockSize/2, bufM, S(streams[0]));
|
||||
swap(M, bufM);
|
||||
|
||||
for (int i = 1; i < 5; ++i)
|
||||
streams[i].waitForCompletion();
|
||||
cudev::optflow_farneback::updateFlowGpu(M, flowx, flowy, S(streams[0]));
|
||||
|
||||
if (updateMatrices)
|
||||
cudev::optflow_farneback::updateMatricesGpu(flowx, flowy, R0, R1, M, S(streams[0]));
|
||||
}
|
||||
|
||||
|
||||
void cv::gpu::FarnebackOpticalFlow::updateFlow_gaussianBlur(
|
||||
const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat& flowy,
|
||||
GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[])
|
||||
{
|
||||
if (deviceSupports(FEATURE_SET_COMPUTE_12))
|
||||
cudev::optflow_farneback::gaussianBlur5Gpu(
|
||||
M, blockSize/2, bufM, BORDER_REPLICATE_GPU, S(streams[0]));
|
||||
else
|
||||
cudev::optflow_farneback::gaussianBlur5Gpu_CC11(
|
||||
M, blockSize/2, bufM, BORDER_REPLICATE_GPU, S(streams[0]));
|
||||
swap(M, bufM);
|
||||
|
||||
cudev::optflow_farneback::updateFlowGpu(M, flowx, flowy, S(streams[0]));
|
||||
|
||||
if (updateMatrices)
|
||||
cudev::optflow_farneback::updateMatricesGpu(flowx, flowy, R0, R1, M, S(streams[0]));
|
||||
}
|
||||
|
||||
|
||||
void cv::gpu::FarnebackOpticalFlow::operator ()(
|
||||
const GpuMat &frame0, const GpuMat &frame1, GpuMat &flowx, GpuMat &flowy, Stream &s)
|
||||
{
|
||||
CV_Assert(frame0.channels() == 1 && frame1.channels() == 1);
|
||||
CV_Assert(frame0.size() == frame1.size());
|
||||
CV_Assert(polyN == 5 || polyN == 7);
|
||||
CV_Assert(!fastPyramids || std::abs(pyrScale - 0.5) < 1e-6);
|
||||
|
||||
Stream streams[5];
|
||||
if (S(s))
|
||||
streams[0] = s;
|
||||
|
||||
Size size = frame0.size();
|
||||
GpuMat prevFlowX, prevFlowY, curFlowX, curFlowY;
|
||||
|
||||
flowx.create(size, CV_32F);
|
||||
flowy.create(size, CV_32F);
|
||||
GpuMat flowx0 = flowx;
|
||||
GpuMat flowy0 = flowy;
|
||||
|
||||
// Crop unnecessary levels
|
||||
double scale = 1;
|
||||
int numLevelsCropped = 0;
|
||||
for (; numLevelsCropped < numLevels; numLevelsCropped++)
|
||||
{
|
||||
scale *= pyrScale;
|
||||
if (size.width*scale < MIN_SIZE || size.height*scale < MIN_SIZE)
|
||||
break;
|
||||
}
|
||||
|
||||
streams[0].enqueueConvert(frame0, frames_[0], CV_32F);
|
||||
streams[1].enqueueConvert(frame1, frames_[1], CV_32F);
|
||||
|
||||
if (fastPyramids)
|
||||
{
|
||||
// Build Gaussian pyramids using pyrDown()
|
||||
pyramid0_.resize(numLevelsCropped + 1);
|
||||
pyramid1_.resize(numLevelsCropped + 1);
|
||||
pyramid0_[0] = frames_[0];
|
||||
pyramid1_[0] = frames_[1];
|
||||
for (int i = 1; i <= numLevelsCropped; ++i)
|
||||
{
|
||||
pyrDown(pyramid0_[i - 1], pyramid0_[i], streams[0]);
|
||||
pyrDown(pyramid1_[i - 1], pyramid1_[i], streams[1]);
|
||||
}
|
||||
}
|
||||
|
||||
setPolynomialExpansionConsts(polyN, polySigma);
|
||||
cudev::optflow_farneback::setUpdateMatricesConsts();
|
||||
|
||||
for (int k = numLevelsCropped; k >= 0; k--)
|
||||
{
|
||||
streams[0].waitForCompletion();
|
||||
|
||||
scale = 1;
|
||||
for (int i = 0; i < k; i++)
|
||||
scale *= pyrScale;
|
||||
|
||||
double sigma = (1./scale - 1) * 0.5;
|
||||
int smoothSize = cvRound(sigma*5) | 1;
|
||||
smoothSize = std::max(smoothSize, 3);
|
||||
|
||||
int width = cvRound(size.width*scale);
|
||||
int height = cvRound(size.height*scale);
|
||||
|
||||
if (fastPyramids)
|
||||
{
|
||||
width = pyramid0_[k].cols;
|
||||
height = pyramid0_[k].rows;
|
||||
}
|
||||
|
||||
if (k > 0)
|
||||
{
|
||||
curFlowX.create(height, width, CV_32F);
|
||||
curFlowY.create(height, width, CV_32F);
|
||||
}
|
||||
else
|
||||
{
|
||||
curFlowX = flowx0;
|
||||
curFlowY = flowy0;
|
||||
}
|
||||
|
||||
if (!prevFlowX.data)
|
||||
{
|
||||
if (flags & OPTFLOW_USE_INITIAL_FLOW)
|
||||
{
|
||||
#if ENABLE_GPU_RESIZE
|
||||
resize(flowx0, curFlowX, Size(width, height), 0, 0, INTER_LINEAR, streams[0]);
|
||||
resize(flowy0, curFlowY, Size(width, height), 0, 0, INTER_LINEAR, streams[1]);
|
||||
streams[0].enqueueConvert(curFlowX, curFlowX, curFlowX.depth(), scale);
|
||||
streams[1].enqueueConvert(curFlowY, curFlowY, curFlowY.depth(), scale);
|
||||
#else
|
||||
Mat tmp1, tmp2;
|
||||
flowx0.download(tmp1);
|
||||
resize(tmp1, tmp2, Size(width, height), 0, 0, INTER_AREA);
|
||||
tmp2 *= scale;
|
||||
curFlowX.upload(tmp2);
|
||||
flowy0.download(tmp1);
|
||||
resize(tmp1, tmp2, Size(width, height), 0, 0, INTER_AREA);
|
||||
tmp2 *= scale;
|
||||
curFlowY.upload(tmp2);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
streams[0].enqueueMemSet(curFlowX, 0);
|
||||
streams[1].enqueueMemSet(curFlowY, 0);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
#if ENABLE_GPU_RESIZE
|
||||
resize(prevFlowX, curFlowX, Size(width, height), 0, 0, INTER_LINEAR, streams[0]);
|
||||
resize(prevFlowY, curFlowY, Size(width, height), 0, 0, INTER_LINEAR, streams[1]);
|
||||
streams[0].enqueueConvert(curFlowX, curFlowX, curFlowX.depth(), 1./pyrScale);
|
||||
streams[1].enqueueConvert(curFlowY, curFlowY, curFlowY.depth(), 1./pyrScale);
|
||||
#else
|
||||
Mat tmp1, tmp2;
|
||||
prevFlowX.download(tmp1);
|
||||
resize(tmp1, tmp2, Size(width, height), 0, 0, INTER_LINEAR);
|
||||
tmp2 *= 1./pyrScale;
|
||||
curFlowX.upload(tmp2);
|
||||
prevFlowY.download(tmp1);
|
||||
resize(tmp1, tmp2, Size(width, height), 0, 0, INTER_LINEAR);
|
||||
tmp2 *= 1./pyrScale;
|
||||
curFlowY.upload(tmp2);
|
||||
#endif
|
||||
}
|
||||
|
||||
GpuMat M = allocMatFromBuf(5*height, width, CV_32F, M_);
|
||||
GpuMat bufM = allocMatFromBuf(5*height, width, CV_32F, bufM_);
|
||||
GpuMat R[2] =
|
||||
{
|
||||
allocMatFromBuf(5*height, width, CV_32F, R_[0]),
|
||||
allocMatFromBuf(5*height, width, CV_32F, R_[1])
|
||||
};
|
||||
|
||||
if (fastPyramids)
|
||||
{
|
||||
cudev::optflow_farneback::polynomialExpansionGpu(pyramid0_[k], polyN, R[0], S(streams[0]));
|
||||
cudev::optflow_farneback::polynomialExpansionGpu(pyramid1_[k], polyN, R[1], S(streams[1]));
|
||||
}
|
||||
else
|
||||
{
|
||||
GpuMat blurredFrame[2] =
|
||||
{
|
||||
allocMatFromBuf(size.height, size.width, CV_32F, blurredFrame_[0]),
|
||||
allocMatFromBuf(size.height, size.width, CV_32F, blurredFrame_[1])
|
||||
};
|
||||
GpuMat pyrLevel[2] =
|
||||
{
|
||||
allocMatFromBuf(height, width, CV_32F, pyrLevel_[0]),
|
||||
allocMatFromBuf(height, width, CV_32F, pyrLevel_[1])
|
||||
};
|
||||
|
||||
Mat g = getGaussianKernel(smoothSize, sigma, CV_32F);
|
||||
cudev::optflow_farneback::setGaussianBlurKernel(g.ptr<float>(smoothSize/2), smoothSize/2);
|
||||
|
||||
for (int i = 0; i < 2; i++)
|
||||
{
|
||||
cudev::optflow_farneback::gaussianBlurGpu(
|
||||
frames_[i], smoothSize/2, blurredFrame[i], BORDER_REFLECT101_GPU, S(streams[i]));
|
||||
#if ENABLE_GPU_RESIZE
|
||||
resize(blurredFrame[i], pyrLevel[i], Size(width, height), INTER_LINEAR, streams[i]);
|
||||
#else
|
||||
Mat tmp1, tmp2;
|
||||
tmp[i].download(tmp1);
|
||||
resize(tmp1, tmp2, Size(width, height), INTER_LINEAR);
|
||||
I[i].upload(tmp2);
|
||||
#endif
|
||||
cudev::optflow_farneback::polynomialExpansionGpu(pyrLevel[i], polyN, R[i], S(streams[i]));
|
||||
}
|
||||
}
|
||||
|
||||
streams[1].waitForCompletion();
|
||||
cudev::optflow_farneback::updateMatricesGpu(curFlowX, curFlowY, R[0], R[1], M, S(streams[0]));
|
||||
|
||||
if (flags & OPTFLOW_FARNEBACK_GAUSSIAN)
|
||||
{
|
||||
Mat g = getGaussianKernel(winSize, winSize/2*0.3f, CV_32F);
|
||||
cudev::optflow_farneback::setGaussianBlurKernel(g.ptr<float>(winSize/2), winSize/2);
|
||||
}
|
||||
for (int i = 0; i < numIters; i++)
|
||||
{
|
||||
if (flags & OPTFLOW_FARNEBACK_GAUSSIAN)
|
||||
updateFlow_gaussianBlur(R[0], R[1], curFlowX, curFlowY, M, bufM, winSize, i < numIters-1, streams);
|
||||
else
|
||||
updateFlow_boxFilter(R[0], R[1], curFlowX, curFlowY, M, bufM, winSize, i < numIters-1, streams);
|
||||
}
|
||||
|
||||
prevFlowX = curFlowX;
|
||||
prevFlowY = curFlowY;
|
||||
}
|
||||
|
||||
flowx = curFlowX;
|
||||
flowy = curFlowY;
|
||||
|
||||
if (!S(s))
|
||||
streams[0].waitForCompletion();
|
||||
}
|
||||
|
||||
#endif
|
43
modules/gpuoptflow/src/precomp.cpp
Normal file
43
modules/gpuoptflow/src/precomp.cpp
Normal file
@@ -0,0 +1,43 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
62
modules/gpuoptflow/src/precomp.hpp
Normal file
62
modules/gpuoptflow/src/precomp.hpp
Normal file
@@ -0,0 +1,62 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_PRECOMP_H__
|
||||
#define __OPENCV_PRECOMP_H__
|
||||
|
||||
#include <limits>
|
||||
|
||||
#include "opencv2/gpuoptflow.hpp"
|
||||
#include "opencv2/gpuarithm.hpp"
|
||||
#include "opencv2/gpuwarping.hpp"
|
||||
|
||||
#include "opencv2/video.hpp"
|
||||
|
||||
#include "opencv2/core/gpu_private.hpp"
|
||||
|
||||
#include "opencv2/opencv_modules.hpp"
|
||||
|
||||
#ifdef HAVE_OPENCV_GPULEGACY
|
||||
# include "opencv2/gpulegacy/private.hpp"
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCV_PRECOMP_H__ */
|
250
modules/gpuoptflow/src/pyrlk.cpp
Normal file
250
modules/gpuoptflow/src/pyrlk.cpp
Normal file
@@ -0,0 +1,250 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::gpu;
|
||||
|
||||
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
|
||||
|
||||
cv::gpu::PyrLKOpticalFlow::PyrLKOpticalFlow() { throw_no_cuda(); }
|
||||
void cv::gpu::PyrLKOpticalFlow::sparse(const GpuMat&, const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat*) { throw_no_cuda(); }
|
||||
void cv::gpu::PyrLKOpticalFlow::dense(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat*) { throw_no_cuda(); }
|
||||
void cv::gpu::PyrLKOpticalFlow::releaseMemory() {}
|
||||
|
||||
#else /* !defined (HAVE_CUDA) */
|
||||
|
||||
namespace pyrlk
|
||||
{
|
||||
void loadConstants(int2 winSize, int iters);
|
||||
|
||||
void sparse1(PtrStepSzf I, PtrStepSzf J, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
|
||||
int level, dim3 block, dim3 patch, cudaStream_t stream = 0);
|
||||
void sparse4(PtrStepSz<float4> I, PtrStepSz<float4> J, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
|
||||
int level, dim3 block, dim3 patch, cudaStream_t stream = 0);
|
||||
|
||||
void dense(PtrStepSzb I, PtrStepSzf J, PtrStepSzf u, PtrStepSzf v, PtrStepSzf prevU, PtrStepSzf prevV,
|
||||
PtrStepSzf err, int2 winSize, cudaStream_t stream = 0);
|
||||
}
|
||||
|
||||
cv::gpu::PyrLKOpticalFlow::PyrLKOpticalFlow()
|
||||
{
|
||||
winSize = Size(21, 21);
|
||||
maxLevel = 3;
|
||||
iters = 30;
|
||||
useInitialFlow = false;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
void calcPatchSize(cv::Size winSize, dim3& block, dim3& patch)
|
||||
{
|
||||
if (winSize.width > 32 && winSize.width > 2 * winSize.height)
|
||||
{
|
||||
block.x = deviceSupports(FEATURE_SET_COMPUTE_12) ? 32 : 16;
|
||||
block.y = 8;
|
||||
}
|
||||
else
|
||||
{
|
||||
block.x = 16;
|
||||
block.y = deviceSupports(FEATURE_SET_COMPUTE_12) ? 16 : 8;
|
||||
}
|
||||
|
||||
patch.x = (winSize.width + block.x - 1) / block.x;
|
||||
patch.y = (winSize.height + block.y - 1) / block.y;
|
||||
|
||||
block.z = patch.z = 1;
|
||||
}
|
||||
}
|
||||
|
||||
void cv::gpu::PyrLKOpticalFlow::sparse(const GpuMat& prevImg, const GpuMat& nextImg, const GpuMat& prevPts, GpuMat& nextPts, GpuMat& status, GpuMat* err)
|
||||
{
|
||||
if (prevPts.empty())
|
||||
{
|
||||
nextPts.release();
|
||||
status.release();
|
||||
if (err) err->release();
|
||||
return;
|
||||
}
|
||||
|
||||
dim3 block, patch;
|
||||
calcPatchSize(winSize, block, patch);
|
||||
|
||||
CV_Assert(prevImg.channels() == 1 || prevImg.channels() == 3 || prevImg.channels() == 4);
|
||||
CV_Assert(prevImg.size() == nextImg.size() && prevImg.type() == nextImg.type());
|
||||
CV_Assert(maxLevel >= 0);
|
||||
CV_Assert(winSize.width > 2 && winSize.height > 2);
|
||||
CV_Assert(patch.x > 0 && patch.x < 6 && patch.y > 0 && patch.y < 6);
|
||||
CV_Assert(prevPts.rows == 1 && prevPts.type() == CV_32FC2);
|
||||
|
||||
if (useInitialFlow)
|
||||
CV_Assert(nextPts.size() == prevPts.size() && nextPts.type() == CV_32FC2);
|
||||
else
|
||||
ensureSizeIsEnough(1, prevPts.cols, prevPts.type(), nextPts);
|
||||
|
||||
GpuMat temp1 = (useInitialFlow ? nextPts : prevPts).reshape(1);
|
||||
GpuMat temp2 = nextPts.reshape(1);
|
||||
multiply(temp1, Scalar::all(1.0 / (1 << maxLevel) / 2.0), temp2);
|
||||
|
||||
ensureSizeIsEnough(1, prevPts.cols, CV_8UC1, status);
|
||||
status.setTo(Scalar::all(1));
|
||||
|
||||
if (err)
|
||||
ensureSizeIsEnough(1, prevPts.cols, CV_32FC1, *err);
|
||||
|
||||
// build the image pyramids.
|
||||
|
||||
prevPyr_.resize(maxLevel + 1);
|
||||
nextPyr_.resize(maxLevel + 1);
|
||||
|
||||
int cn = prevImg.channels();
|
||||
|
||||
if (cn == 1 || cn == 4)
|
||||
{
|
||||
prevImg.convertTo(prevPyr_[0], CV_32F);
|
||||
nextImg.convertTo(nextPyr_[0], CV_32F);
|
||||
}
|
||||
else
|
||||
{
|
||||
cvtColor(prevImg, buf_, COLOR_BGR2BGRA);
|
||||
buf_.convertTo(prevPyr_[0], CV_32F);
|
||||
|
||||
cvtColor(nextImg, buf_, COLOR_BGR2BGRA);
|
||||
buf_.convertTo(nextPyr_[0], CV_32F);
|
||||
}
|
||||
|
||||
for (int level = 1; level <= maxLevel; ++level)
|
||||
{
|
||||
pyrDown(prevPyr_[level - 1], prevPyr_[level]);
|
||||
pyrDown(nextPyr_[level - 1], nextPyr_[level]);
|
||||
}
|
||||
|
||||
pyrlk::loadConstants(make_int2(winSize.width, winSize.height), iters);
|
||||
|
||||
for (int level = maxLevel; level >= 0; level--)
|
||||
{
|
||||
if (cn == 1)
|
||||
{
|
||||
pyrlk::sparse1(prevPyr_[level], nextPyr_[level],
|
||||
prevPts.ptr<float2>(), nextPts.ptr<float2>(), status.ptr(), level == 0 && err ? err->ptr<float>() : 0, prevPts.cols,
|
||||
level, block, patch);
|
||||
}
|
||||
else
|
||||
{
|
||||
pyrlk::sparse4(prevPyr_[level], nextPyr_[level],
|
||||
prevPts.ptr<float2>(), nextPts.ptr<float2>(), status.ptr(), level == 0 && err ? err->ptr<float>() : 0, prevPts.cols,
|
||||
level, block, patch);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void cv::gpu::PyrLKOpticalFlow::dense(const GpuMat& prevImg, const GpuMat& nextImg, GpuMat& u, GpuMat& v, GpuMat* err)
|
||||
{
|
||||
CV_Assert(prevImg.type() == CV_8UC1);
|
||||
CV_Assert(prevImg.size() == nextImg.size() && prevImg.type() == nextImg.type());
|
||||
CV_Assert(maxLevel >= 0);
|
||||
CV_Assert(winSize.width > 2 && winSize.height > 2);
|
||||
|
||||
if (err)
|
||||
err->create(prevImg.size(), CV_32FC1);
|
||||
|
||||
// build the image pyramids.
|
||||
|
||||
prevPyr_.resize(maxLevel + 1);
|
||||
nextPyr_.resize(maxLevel + 1);
|
||||
|
||||
prevPyr_[0] = prevImg;
|
||||
nextImg.convertTo(nextPyr_[0], CV_32F);
|
||||
|
||||
for (int level = 1; level <= maxLevel; ++level)
|
||||
{
|
||||
pyrDown(prevPyr_[level - 1], prevPyr_[level]);
|
||||
pyrDown(nextPyr_[level - 1], nextPyr_[level]);
|
||||
}
|
||||
|
||||
ensureSizeIsEnough(prevImg.size(), CV_32FC1, uPyr_[0]);
|
||||
ensureSizeIsEnough(prevImg.size(), CV_32FC1, vPyr_[0]);
|
||||
ensureSizeIsEnough(prevImg.size(), CV_32FC1, uPyr_[1]);
|
||||
ensureSizeIsEnough(prevImg.size(), CV_32FC1, vPyr_[1]);
|
||||
uPyr_[0].setTo(Scalar::all(0));
|
||||
vPyr_[0].setTo(Scalar::all(0));
|
||||
uPyr_[1].setTo(Scalar::all(0));
|
||||
vPyr_[1].setTo(Scalar::all(0));
|
||||
|
||||
int2 winSize2i = make_int2(winSize.width, winSize.height);
|
||||
pyrlk::loadConstants(winSize2i, iters);
|
||||
|
||||
PtrStepSzf derr = err ? *err : PtrStepSzf();
|
||||
|
||||
int idx = 0;
|
||||
|
||||
for (int level = maxLevel; level >= 0; level--)
|
||||
{
|
||||
int idx2 = (idx + 1) & 1;
|
||||
|
||||
pyrlk::dense(prevPyr_[level], nextPyr_[level], uPyr_[idx], vPyr_[idx], uPyr_[idx2], vPyr_[idx2],
|
||||
level == 0 ? derr : PtrStepSzf(), winSize2i);
|
||||
|
||||
if (level > 0)
|
||||
idx = idx2;
|
||||
}
|
||||
|
||||
uPyr_[idx].copyTo(u);
|
||||
vPyr_[idx].copyTo(v);
|
||||
}
|
||||
|
||||
void cv::gpu::PyrLKOpticalFlow::releaseMemory()
|
||||
{
|
||||
prevPyr_.clear();
|
||||
nextPyr_.clear();
|
||||
|
||||
buf_.release();
|
||||
|
||||
uPyr_[0].release();
|
||||
vPyr_[0].release();
|
||||
|
||||
uPyr_[1].release();
|
||||
vPyr_[1].release();
|
||||
}
|
||||
|
||||
#endif /* !defined (HAVE_CUDA) */
|
258
modules/gpuoptflow/src/tvl1flow.cpp
Normal file
258
modules/gpuoptflow/src/tvl1flow.cpp
Normal file
@@ -0,0 +1,258 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
|
||||
|
||||
cv::gpu::OpticalFlowDual_TVL1_GPU::OpticalFlowDual_TVL1_GPU() { throw_no_cuda(); }
|
||||
void cv::gpu::OpticalFlowDual_TVL1_GPU::operator ()(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&) { throw_no_cuda(); }
|
||||
void cv::gpu::OpticalFlowDual_TVL1_GPU::collectGarbage() {}
|
||||
void cv::gpu::OpticalFlowDual_TVL1_GPU::procOneScale(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&) { throw_no_cuda(); }
|
||||
|
||||
#else
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::gpu;
|
||||
|
||||
cv::gpu::OpticalFlowDual_TVL1_GPU::OpticalFlowDual_TVL1_GPU()
|
||||
{
|
||||
tau = 0.25;
|
||||
lambda = 0.15;
|
||||
theta = 0.3;
|
||||
nscales = 5;
|
||||
warps = 5;
|
||||
epsilon = 0.01;
|
||||
iterations = 300;
|
||||
scaleStep = 0.8;
|
||||
useInitialFlow = false;
|
||||
}
|
||||
|
||||
void cv::gpu::OpticalFlowDual_TVL1_GPU::operator ()(const GpuMat& I0, const GpuMat& I1, GpuMat& flowx, GpuMat& flowy)
|
||||
{
|
||||
CV_Assert( I0.type() == CV_8UC1 || I0.type() == CV_32FC1 );
|
||||
CV_Assert( I0.size() == I1.size() );
|
||||
CV_Assert( I0.type() == I1.type() );
|
||||
CV_Assert( !useInitialFlow || (flowx.size() == I0.size() && flowx.type() == CV_32FC1 && flowy.size() == flowx.size() && flowy.type() == flowx.type()) );
|
||||
CV_Assert( nscales > 0 );
|
||||
|
||||
// allocate memory for the pyramid structure
|
||||
I0s.resize(nscales);
|
||||
I1s.resize(nscales);
|
||||
u1s.resize(nscales);
|
||||
u2s.resize(nscales);
|
||||
|
||||
I0.convertTo(I0s[0], CV_32F, I0.depth() == CV_8U ? 1.0 : 255.0);
|
||||
I1.convertTo(I1s[0], CV_32F, I1.depth() == CV_8U ? 1.0 : 255.0);
|
||||
|
||||
if (!useInitialFlow)
|
||||
{
|
||||
flowx.create(I0.size(), CV_32FC1);
|
||||
flowy.create(I0.size(), CV_32FC1);
|
||||
}
|
||||
|
||||
u1s[0] = flowx;
|
||||
u2s[0] = flowy;
|
||||
|
||||
I1x_buf.create(I0.size(), CV_32FC1);
|
||||
I1y_buf.create(I0.size(), CV_32FC1);
|
||||
|
||||
I1w_buf.create(I0.size(), CV_32FC1);
|
||||
I1wx_buf.create(I0.size(), CV_32FC1);
|
||||
I1wy_buf.create(I0.size(), CV_32FC1);
|
||||
|
||||
grad_buf.create(I0.size(), CV_32FC1);
|
||||
rho_c_buf.create(I0.size(), CV_32FC1);
|
||||
|
||||
p11_buf.create(I0.size(), CV_32FC1);
|
||||
p12_buf.create(I0.size(), CV_32FC1);
|
||||
p21_buf.create(I0.size(), CV_32FC1);
|
||||
p22_buf.create(I0.size(), CV_32FC1);
|
||||
|
||||
diff_buf.create(I0.size(), CV_32FC1);
|
||||
|
||||
// create the scales
|
||||
for (int s = 1; s < nscales; ++s)
|
||||
{
|
||||
gpu::resize(I0s[s-1], I0s[s], Size(), scaleStep, scaleStep);
|
||||
gpu::resize(I1s[s-1], I1s[s], Size(), scaleStep, scaleStep);
|
||||
|
||||
if (I0s[s].cols < 16 || I0s[s].rows < 16)
|
||||
{
|
||||
nscales = s;
|
||||
break;
|
||||
}
|
||||
|
||||
if (useInitialFlow)
|
||||
{
|
||||
gpu::resize(u1s[s-1], u1s[s], Size(), scaleStep, scaleStep);
|
||||
gpu::resize(u2s[s-1], u2s[s], Size(), scaleStep, scaleStep);
|
||||
|
||||
gpu::multiply(u1s[s], Scalar::all(scaleStep), u1s[s]);
|
||||
gpu::multiply(u2s[s], Scalar::all(scaleStep), u2s[s]);
|
||||
}
|
||||
else
|
||||
{
|
||||
u1s[s].create(I0s[s].size(), CV_32FC1);
|
||||
u2s[s].create(I0s[s].size(), CV_32FC1);
|
||||
}
|
||||
}
|
||||
|
||||
if (!useInitialFlow)
|
||||
{
|
||||
u1s[nscales-1].setTo(Scalar::all(0));
|
||||
u2s[nscales-1].setTo(Scalar::all(0));
|
||||
}
|
||||
|
||||
// pyramidal structure for computing the optical flow
|
||||
for (int s = nscales - 1; s >= 0; --s)
|
||||
{
|
||||
// compute the optical flow at the current scale
|
||||
procOneScale(I0s[s], I1s[s], u1s[s], u2s[s]);
|
||||
|
||||
// if this was the last scale, finish now
|
||||
if (s == 0)
|
||||
break;
|
||||
|
||||
// otherwise, upsample the optical flow
|
||||
|
||||
// zoom the optical flow for the next finer scale
|
||||
gpu::resize(u1s[s], u1s[s - 1], I0s[s - 1].size());
|
||||
gpu::resize(u2s[s], u2s[s - 1], I0s[s - 1].size());
|
||||
|
||||
// scale the optical flow with the appropriate zoom factor
|
||||
gpu::multiply(u1s[s - 1], Scalar::all(1/scaleStep), u1s[s - 1]);
|
||||
gpu::multiply(u2s[s - 1], Scalar::all(1/scaleStep), u2s[s - 1]);
|
||||
}
|
||||
}
|
||||
|
||||
namespace tvl1flow
|
||||
{
|
||||
void centeredGradient(PtrStepSzf src, PtrStepSzf dx, PtrStepSzf dy);
|
||||
void warpBackward(PtrStepSzf I0, PtrStepSzf I1, PtrStepSzf I1x, PtrStepSzf I1y, PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf I1w, PtrStepSzf I1wx, PtrStepSzf I1wy, PtrStepSzf grad, PtrStepSzf rho);
|
||||
void estimateU(PtrStepSzf I1wx, PtrStepSzf I1wy,
|
||||
PtrStepSzf grad, PtrStepSzf rho_c,
|
||||
PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22,
|
||||
PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf error,
|
||||
float l_t, float theta);
|
||||
void estimateDualVariables(PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, float taut);
|
||||
}
|
||||
|
||||
void cv::gpu::OpticalFlowDual_TVL1_GPU::procOneScale(const GpuMat& I0, const GpuMat& I1, GpuMat& u1, GpuMat& u2)
|
||||
{
|
||||
using namespace tvl1flow;
|
||||
|
||||
const double scaledEpsilon = epsilon * epsilon * I0.size().area();
|
||||
|
||||
CV_DbgAssert( I1.size() == I0.size() );
|
||||
CV_DbgAssert( I1.type() == I0.type() );
|
||||
CV_DbgAssert( u1.size() == I0.size() );
|
||||
CV_DbgAssert( u2.size() == u1.size() );
|
||||
|
||||
GpuMat I1x = I1x_buf(Rect(0, 0, I0.cols, I0.rows));
|
||||
GpuMat I1y = I1y_buf(Rect(0, 0, I0.cols, I0.rows));
|
||||
centeredGradient(I1, I1x, I1y);
|
||||
|
||||
GpuMat I1w = I1w_buf(Rect(0, 0, I0.cols, I0.rows));
|
||||
GpuMat I1wx = I1wx_buf(Rect(0, 0, I0.cols, I0.rows));
|
||||
GpuMat I1wy = I1wy_buf(Rect(0, 0, I0.cols, I0.rows));
|
||||
|
||||
GpuMat grad = grad_buf(Rect(0, 0, I0.cols, I0.rows));
|
||||
GpuMat rho_c = rho_c_buf(Rect(0, 0, I0.cols, I0.rows));
|
||||
|
||||
GpuMat p11 = p11_buf(Rect(0, 0, I0.cols, I0.rows));
|
||||
GpuMat p12 = p12_buf(Rect(0, 0, I0.cols, I0.rows));
|
||||
GpuMat p21 = p21_buf(Rect(0, 0, I0.cols, I0.rows));
|
||||
GpuMat p22 = p22_buf(Rect(0, 0, I0.cols, I0.rows));
|
||||
p11.setTo(Scalar::all(0));
|
||||
p12.setTo(Scalar::all(0));
|
||||
p21.setTo(Scalar::all(0));
|
||||
p22.setTo(Scalar::all(0));
|
||||
|
||||
GpuMat diff = diff_buf(Rect(0, 0, I0.cols, I0.rows));
|
||||
|
||||
const float l_t = static_cast<float>(lambda * theta);
|
||||
const float taut = static_cast<float>(tau / theta);
|
||||
|
||||
for (int warpings = 0; warpings < warps; ++warpings)
|
||||
{
|
||||
warpBackward(I0, I1, I1x, I1y, u1, u2, I1w, I1wx, I1wy, grad, rho_c);
|
||||
|
||||
double error = std::numeric_limits<double>::max();
|
||||
for (int n = 0; error > scaledEpsilon && n < iterations; ++n)
|
||||
{
|
||||
estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, u1, u2, diff, l_t, static_cast<float>(theta));
|
||||
|
||||
error = gpu::sum(diff, norm_buf)[0];
|
||||
|
||||
estimateDualVariables(u1, u2, p11, p12, p21, p22, taut);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void cv::gpu::OpticalFlowDual_TVL1_GPU::collectGarbage()
|
||||
{
|
||||
I0s.clear();
|
||||
I1s.clear();
|
||||
u1s.clear();
|
||||
u2s.clear();
|
||||
|
||||
I1x_buf.release();
|
||||
I1y_buf.release();
|
||||
|
||||
I1w_buf.release();
|
||||
I1wx_buf.release();
|
||||
I1wy_buf.release();
|
||||
|
||||
grad_buf.release();
|
||||
rho_c_buf.release();
|
||||
|
||||
p11_buf.release();
|
||||
p12_buf.release();
|
||||
p21_buf.release();
|
||||
p22_buf.release();
|
||||
|
||||
diff_buf.release();
|
||||
norm_buf.release();
|
||||
}
|
||||
|
||||
#endif // !defined HAVE_CUDA || defined(CUDA_DISABLER)
|
45
modules/gpuoptflow/test/test_main.cpp
Normal file
45
modules/gpuoptflow/test/test_main.cpp
Normal file
@@ -0,0 +1,45 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "test_precomp.hpp"
|
||||
|
||||
CV_GPU_TEST_MAIN("gpu")
|
548
modules/gpuoptflow/test/test_optflow.cpp
Normal file
548
modules/gpuoptflow/test/test_optflow.cpp
Normal file
@@ -0,0 +1,548 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "test_precomp.hpp"
|
||||
#include "opencv2/legacy.hpp"
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
|
||||
using namespace cvtest;
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// BroxOpticalFlow
|
||||
|
||||
//#define BROX_DUMP
|
||||
|
||||
struct BroxOpticalFlow : testing::TestWithParam<cv::gpu::DeviceInfo>
|
||||
{
|
||||
cv::gpu::DeviceInfo devInfo;
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
devInfo = GetParam();
|
||||
|
||||
cv::gpu::setDevice(devInfo.deviceID());
|
||||
}
|
||||
};
|
||||
|
||||
GPU_TEST_P(BroxOpticalFlow, Regression)
|
||||
{
|
||||
cv::Mat frame0 = readImageType("opticalflow/frame0.png", CV_32FC1);
|
||||
ASSERT_FALSE(frame0.empty());
|
||||
|
||||
cv::Mat frame1 = readImageType("opticalflow/frame1.png", CV_32FC1);
|
||||
ASSERT_FALSE(frame1.empty());
|
||||
|
||||
cv::gpu::BroxOpticalFlow brox(0.197f /*alpha*/, 50.0f /*gamma*/, 0.8f /*scale_factor*/,
|
||||
10 /*inner_iterations*/, 77 /*outer_iterations*/, 10 /*solver_iterations*/);
|
||||
|
||||
cv::gpu::GpuMat u;
|
||||
cv::gpu::GpuMat v;
|
||||
brox(loadMat(frame0), loadMat(frame1), u, v);
|
||||
|
||||
std::string fname(cvtest::TS::ptr()->get_data_path());
|
||||
if (devInfo.majorVersion() >= 2)
|
||||
fname += "opticalflow/brox_optical_flow_cc20.bin";
|
||||
else
|
||||
fname += "opticalflow/brox_optical_flow.bin";
|
||||
|
||||
#ifndef BROX_DUMP
|
||||
std::ifstream f(fname.c_str(), std::ios_base::binary);
|
||||
|
||||
int rows, cols;
|
||||
|
||||
f.read((char*) &rows, sizeof(rows));
|
||||
f.read((char*) &cols, sizeof(cols));
|
||||
|
||||
cv::Mat u_gold(rows, cols, CV_32FC1);
|
||||
|
||||
for (int i = 0; i < u_gold.rows; ++i)
|
||||
f.read(u_gold.ptr<char>(i), u_gold.cols * sizeof(float));
|
||||
|
||||
cv::Mat v_gold(rows, cols, CV_32FC1);
|
||||
|
||||
for (int i = 0; i < v_gold.rows; ++i)
|
||||
f.read(v_gold.ptr<char>(i), v_gold.cols * sizeof(float));
|
||||
|
||||
EXPECT_MAT_NEAR(u_gold, u, 0);
|
||||
EXPECT_MAT_NEAR(v_gold, v, 0);
|
||||
#else
|
||||
std::ofstream f(fname.c_str(), std::ios_base::binary);
|
||||
|
||||
f.write((char*) &u.rows, sizeof(u.rows));
|
||||
f.write((char*) &u.cols, sizeof(u.cols));
|
||||
|
||||
cv::Mat h_u(u);
|
||||
cv::Mat h_v(v);
|
||||
|
||||
for (int i = 0; i < u.rows; ++i)
|
||||
f.write(h_u.ptr<char>(i), u.cols * sizeof(float));
|
||||
|
||||
for (int i = 0; i < v.rows; ++i)
|
||||
f.write(h_v.ptr<char>(i), v.cols * sizeof(float));
|
||||
#endif
|
||||
}
|
||||
|
||||
GPU_TEST_P(BroxOpticalFlow, OpticalFlowNan)
|
||||
{
|
||||
cv::Mat frame0 = readImageType("opticalflow/frame0.png", CV_32FC1);
|
||||
ASSERT_FALSE(frame0.empty());
|
||||
|
||||
cv::Mat frame1 = readImageType("opticalflow/frame1.png", CV_32FC1);
|
||||
ASSERT_FALSE(frame1.empty());
|
||||
|
||||
cv::Mat r_frame0, r_frame1;
|
||||
cv::resize(frame0, r_frame0, cv::Size(1380,1000));
|
||||
cv::resize(frame1, r_frame1, cv::Size(1380,1000));
|
||||
|
||||
cv::gpu::BroxOpticalFlow brox(0.197f /*alpha*/, 50.0f /*gamma*/, 0.8f /*scale_factor*/,
|
||||
5 /*inner_iterations*/, 150 /*outer_iterations*/, 10 /*solver_iterations*/);
|
||||
|
||||
cv::gpu::GpuMat u;
|
||||
cv::gpu::GpuMat v;
|
||||
brox(loadMat(r_frame0), loadMat(r_frame1), u, v);
|
||||
|
||||
cv::Mat h_u, h_v;
|
||||
u.download(h_u);
|
||||
v.download(h_v);
|
||||
|
||||
EXPECT_TRUE(cv::checkRange(h_u));
|
||||
EXPECT_TRUE(cv::checkRange(h_v));
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(GPU_Video, BroxOpticalFlow, ALL_DEVICES);
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// PyrLKOpticalFlow
|
||||
|
||||
namespace
|
||||
{
|
||||
IMPLEMENT_PARAM_CLASS(UseGray, bool)
|
||||
}
|
||||
|
||||
PARAM_TEST_CASE(PyrLKOpticalFlow, cv::gpu::DeviceInfo, UseGray)
|
||||
{
|
||||
cv::gpu::DeviceInfo devInfo;
|
||||
bool useGray;
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
devInfo = GET_PARAM(0);
|
||||
useGray = GET_PARAM(1);
|
||||
|
||||
cv::gpu::setDevice(devInfo.deviceID());
|
||||
}
|
||||
};
|
||||
|
||||
GPU_TEST_P(PyrLKOpticalFlow, Sparse)
|
||||
{
|
||||
cv::Mat frame0 = readImage("opticalflow/frame0.png", useGray ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR);
|
||||
ASSERT_FALSE(frame0.empty());
|
||||
|
||||
cv::Mat frame1 = readImage("opticalflow/frame1.png", useGray ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR);
|
||||
ASSERT_FALSE(frame1.empty());
|
||||
|
||||
cv::Mat gray_frame;
|
||||
if (useGray)
|
||||
gray_frame = frame0;
|
||||
else
|
||||
cv::cvtColor(frame0, gray_frame, cv::COLOR_BGR2GRAY);
|
||||
|
||||
std::vector<cv::Point2f> pts;
|
||||
cv::goodFeaturesToTrack(gray_frame, pts, 1000, 0.01, 0.0);
|
||||
|
||||
cv::gpu::GpuMat d_pts;
|
||||
cv::Mat pts_mat(1, (int) pts.size(), CV_32FC2, (void*) &pts[0]);
|
||||
d_pts.upload(pts_mat);
|
||||
|
||||
cv::gpu::PyrLKOpticalFlow pyrLK;
|
||||
|
||||
cv::gpu::GpuMat d_nextPts;
|
||||
cv::gpu::GpuMat d_status;
|
||||
pyrLK.sparse(loadMat(frame0), loadMat(frame1), d_pts, d_nextPts, d_status);
|
||||
|
||||
std::vector<cv::Point2f> nextPts(d_nextPts.cols);
|
||||
cv::Mat nextPts_mat(1, d_nextPts.cols, CV_32FC2, (void*) &nextPts[0]);
|
||||
d_nextPts.download(nextPts_mat);
|
||||
|
||||
std::vector<unsigned char> status(d_status.cols);
|
||||
cv::Mat status_mat(1, d_status.cols, CV_8UC1, (void*) &status[0]);
|
||||
d_status.download(status_mat);
|
||||
|
||||
std::vector<cv::Point2f> nextPts_gold;
|
||||
std::vector<unsigned char> status_gold;
|
||||
cv::calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts_gold, status_gold, cv::noArray());
|
||||
|
||||
ASSERT_EQ(nextPts_gold.size(), nextPts.size());
|
||||
ASSERT_EQ(status_gold.size(), status.size());
|
||||
|
||||
size_t mistmatch = 0;
|
||||
for (size_t i = 0; i < nextPts.size(); ++i)
|
||||
{
|
||||
cv::Point2i a = nextPts[i];
|
||||
cv::Point2i b = nextPts_gold[i];
|
||||
|
||||
if (status[i] != status_gold[i])
|
||||
{
|
||||
++mistmatch;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (status[i])
|
||||
{
|
||||
bool eq = std::abs(a.x - b.x) <= 1 && std::abs(a.y - b.y) <= 1;
|
||||
|
||||
if (!eq)
|
||||
++mistmatch;
|
||||
}
|
||||
}
|
||||
|
||||
double bad_ratio = static_cast<double>(mistmatch) / nextPts.size();
|
||||
|
||||
ASSERT_LE(bad_ratio, 0.01);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(GPU_Video, PyrLKOpticalFlow, testing::Combine(
|
||||
ALL_DEVICES,
|
||||
testing::Values(UseGray(true), UseGray(false))));
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// FarnebackOpticalFlow
|
||||
|
||||
namespace
|
||||
{
|
||||
IMPLEMENT_PARAM_CLASS(PyrScale, double)
|
||||
IMPLEMENT_PARAM_CLASS(PolyN, int)
|
||||
CV_FLAGS(FarnebackOptFlowFlags, 0, OPTFLOW_FARNEBACK_GAUSSIAN)
|
||||
IMPLEMENT_PARAM_CLASS(UseInitFlow, bool)
|
||||
}
|
||||
|
||||
PARAM_TEST_CASE(FarnebackOpticalFlow, cv::gpu::DeviceInfo, PyrScale, PolyN, FarnebackOptFlowFlags, UseInitFlow)
|
||||
{
|
||||
cv::gpu::DeviceInfo devInfo;
|
||||
double pyrScale;
|
||||
int polyN;
|
||||
int flags;
|
||||
bool useInitFlow;
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
devInfo = GET_PARAM(0);
|
||||
pyrScale = GET_PARAM(1);
|
||||
polyN = GET_PARAM(2);
|
||||
flags = GET_PARAM(3);
|
||||
useInitFlow = GET_PARAM(4);
|
||||
|
||||
cv::gpu::setDevice(devInfo.deviceID());
|
||||
}
|
||||
};
|
||||
|
||||
GPU_TEST_P(FarnebackOpticalFlow, Accuracy)
|
||||
{
|
||||
cv::Mat frame0 = readImage("opticalflow/rubberwhale1.png", cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(frame0.empty());
|
||||
|
||||
cv::Mat frame1 = readImage("opticalflow/rubberwhale2.png", cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(frame1.empty());
|
||||
|
||||
double polySigma = polyN <= 5 ? 1.1 : 1.5;
|
||||
|
||||
cv::gpu::FarnebackOpticalFlow farn;
|
||||
farn.pyrScale = pyrScale;
|
||||
farn.polyN = polyN;
|
||||
farn.polySigma = polySigma;
|
||||
farn.flags = flags;
|
||||
|
||||
cv::gpu::GpuMat d_flowx, d_flowy;
|
||||
farn(loadMat(frame0), loadMat(frame1), d_flowx, d_flowy);
|
||||
|
||||
cv::Mat flow;
|
||||
if (useInitFlow)
|
||||
{
|
||||
cv::Mat flowxy[] = {cv::Mat(d_flowx), cv::Mat(d_flowy)};
|
||||
cv::merge(flowxy, 2, flow);
|
||||
|
||||
farn.flags |= cv::OPTFLOW_USE_INITIAL_FLOW;
|
||||
farn(loadMat(frame0), loadMat(frame1), d_flowx, d_flowy);
|
||||
}
|
||||
|
||||
cv::calcOpticalFlowFarneback(
|
||||
frame0, frame1, flow, farn.pyrScale, farn.numLevels, farn.winSize,
|
||||
farn.numIters, farn.polyN, farn.polySigma, farn.flags);
|
||||
|
||||
std::vector<cv::Mat> flowxy;
|
||||
cv::split(flow, flowxy);
|
||||
|
||||
EXPECT_MAT_SIMILAR(flowxy[0], d_flowx, 0.1);
|
||||
EXPECT_MAT_SIMILAR(flowxy[1], d_flowy, 0.1);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(GPU_Video, FarnebackOpticalFlow, testing::Combine(
|
||||
ALL_DEVICES,
|
||||
testing::Values(PyrScale(0.3), PyrScale(0.5), PyrScale(0.8)),
|
||||
testing::Values(PolyN(5), PolyN(7)),
|
||||
testing::Values(FarnebackOptFlowFlags(0), FarnebackOptFlowFlags(cv::OPTFLOW_FARNEBACK_GAUSSIAN)),
|
||||
testing::Values(UseInitFlow(false), UseInitFlow(true))));
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// OpticalFlowDual_TVL1
|
||||
|
||||
PARAM_TEST_CASE(OpticalFlowDual_TVL1, cv::gpu::DeviceInfo, UseRoi)
|
||||
{
|
||||
cv::gpu::DeviceInfo devInfo;
|
||||
bool useRoi;
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
devInfo = GET_PARAM(0);
|
||||
useRoi = GET_PARAM(1);
|
||||
|
||||
cv::gpu::setDevice(devInfo.deviceID());
|
||||
}
|
||||
};
|
||||
|
||||
GPU_TEST_P(OpticalFlowDual_TVL1, Accuracy)
|
||||
{
|
||||
cv::Mat frame0 = readImage("opticalflow/rubberwhale1.png", cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(frame0.empty());
|
||||
|
||||
cv::Mat frame1 = readImage("opticalflow/rubberwhale2.png", cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(frame1.empty());
|
||||
|
||||
cv::gpu::OpticalFlowDual_TVL1_GPU d_alg;
|
||||
cv::gpu::GpuMat d_flowx = createMat(frame0.size(), CV_32FC1, useRoi);
|
||||
cv::gpu::GpuMat d_flowy = createMat(frame0.size(), CV_32FC1, useRoi);
|
||||
d_alg(loadMat(frame0, useRoi), loadMat(frame1, useRoi), d_flowx, d_flowy);
|
||||
|
||||
cv::Ptr<cv::DenseOpticalFlow> alg = cv::createOptFlow_DualTVL1();
|
||||
alg->set("medianFiltering", 1);
|
||||
alg->set("innerIterations", 1);
|
||||
alg->set("outerIterations", d_alg.iterations);
|
||||
cv::Mat flow;
|
||||
alg->calc(frame0, frame1, flow);
|
||||
cv::Mat gold[2];
|
||||
cv::split(flow, gold);
|
||||
|
||||
EXPECT_MAT_SIMILAR(gold[0], d_flowx, 4e-3);
|
||||
EXPECT_MAT_SIMILAR(gold[1], d_flowy, 4e-3);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(GPU_Video, OpticalFlowDual_TVL1, testing::Combine(
|
||||
ALL_DEVICES,
|
||||
WHOLE_SUBMAT));
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// OpticalFlowBM
|
||||
|
||||
namespace
|
||||
{
|
||||
void calcOpticalFlowBM(const cv::Mat& prev, const cv::Mat& curr,
|
||||
cv::Size bSize, cv::Size shiftSize, cv::Size maxRange, int usePrevious,
|
||||
cv::Mat& velx, cv::Mat& vely)
|
||||
{
|
||||
cv::Size sz((curr.cols - bSize.width + shiftSize.width)/shiftSize.width, (curr.rows - bSize.height + shiftSize.height)/shiftSize.height);
|
||||
|
||||
velx.create(sz, CV_32FC1);
|
||||
vely.create(sz, CV_32FC1);
|
||||
|
||||
CvMat cvprev = prev;
|
||||
CvMat cvcurr = curr;
|
||||
|
||||
CvMat cvvelx = velx;
|
||||
CvMat cvvely = vely;
|
||||
|
||||
cvCalcOpticalFlowBM(&cvprev, &cvcurr, bSize, shiftSize, maxRange, usePrevious, &cvvelx, &cvvely);
|
||||
}
|
||||
}
|
||||
|
||||
struct OpticalFlowBM : testing::TestWithParam<cv::gpu::DeviceInfo>
|
||||
{
|
||||
};
|
||||
|
||||
GPU_TEST_P(OpticalFlowBM, Accuracy)
|
||||
{
|
||||
cv::gpu::DeviceInfo devInfo = GetParam();
|
||||
cv::gpu::setDevice(devInfo.deviceID());
|
||||
|
||||
cv::Mat frame0 = readImage("opticalflow/rubberwhale1.png", cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(frame0.empty());
|
||||
|
||||
cv::Mat frame1 = readImage("opticalflow/rubberwhale2.png", cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(frame1.empty());
|
||||
|
||||
cv::Size block_size(16, 16);
|
||||
cv::Size shift_size(1, 1);
|
||||
cv::Size max_range(16, 16);
|
||||
|
||||
cv::gpu::GpuMat d_velx, d_vely, buf;
|
||||
cv::gpu::calcOpticalFlowBM(loadMat(frame0), loadMat(frame1),
|
||||
block_size, shift_size, max_range, false,
|
||||
d_velx, d_vely, buf);
|
||||
|
||||
cv::Mat velx, vely;
|
||||
calcOpticalFlowBM(frame0, frame1, block_size, shift_size, max_range, false, velx, vely);
|
||||
|
||||
EXPECT_MAT_NEAR(velx, d_velx, 0);
|
||||
EXPECT_MAT_NEAR(vely, d_vely, 0);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(GPU_Video, OpticalFlowBM, ALL_DEVICES);
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// FastOpticalFlowBM
|
||||
|
||||
namespace
|
||||
{
|
||||
void FastOpticalFlowBM_gold(const cv::Mat_<uchar>& I0, const cv::Mat_<uchar>& I1, cv::Mat_<float>& velx, cv::Mat_<float>& vely, int search_window, int block_window)
|
||||
{
|
||||
velx.create(I0.size());
|
||||
vely.create(I0.size());
|
||||
|
||||
int search_radius = search_window / 2;
|
||||
int block_radius = block_window / 2;
|
||||
|
||||
for (int y = 0; y < I0.rows; ++y)
|
||||
{
|
||||
for (int x = 0; x < I0.cols; ++x)
|
||||
{
|
||||
int bestDist = std::numeric_limits<int>::max();
|
||||
int bestDx = 0;
|
||||
int bestDy = 0;
|
||||
|
||||
for (int dy = -search_radius; dy <= search_radius; ++dy)
|
||||
{
|
||||
for (int dx = -search_radius; dx <= search_radius; ++dx)
|
||||
{
|
||||
int dist = 0;
|
||||
|
||||
for (int by = -block_radius; by <= block_radius; ++by)
|
||||
{
|
||||
for (int bx = -block_radius; bx <= block_radius; ++bx)
|
||||
{
|
||||
int I0_val = I0(cv::borderInterpolate(y + by, I0.rows, cv::BORDER_DEFAULT), cv::borderInterpolate(x + bx, I0.cols, cv::BORDER_DEFAULT));
|
||||
int I1_val = I1(cv::borderInterpolate(y + dy + by, I0.rows, cv::BORDER_DEFAULT), cv::borderInterpolate(x + dx + bx, I0.cols, cv::BORDER_DEFAULT));
|
||||
|
||||
dist += std::abs(I0_val - I1_val);
|
||||
}
|
||||
}
|
||||
|
||||
if (dist < bestDist)
|
||||
{
|
||||
bestDist = dist;
|
||||
bestDx = dx;
|
||||
bestDy = dy;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
velx(y, x) = (float) bestDx;
|
||||
vely(y, x) = (float) bestDy;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
double calc_rmse(const cv::Mat_<float>& flow1, const cv::Mat_<float>& flow2)
|
||||
{
|
||||
double sum = 0.0;
|
||||
|
||||
for (int y = 0; y < flow1.rows; ++y)
|
||||
{
|
||||
for (int x = 0; x < flow1.cols; ++x)
|
||||
{
|
||||
double diff = flow1(y, x) - flow2(y, x);
|
||||
sum += diff * diff;
|
||||
}
|
||||
}
|
||||
|
||||
return std::sqrt(sum / flow1.size().area());
|
||||
}
|
||||
}
|
||||
|
||||
struct FastOpticalFlowBM : testing::TestWithParam<cv::gpu::DeviceInfo>
|
||||
{
|
||||
};
|
||||
|
||||
GPU_TEST_P(FastOpticalFlowBM, Accuracy)
|
||||
{
|
||||
const double MAX_RMSE = 0.6;
|
||||
|
||||
int search_window = 15;
|
||||
int block_window = 5;
|
||||
|
||||
cv::gpu::DeviceInfo devInfo = GetParam();
|
||||
cv::gpu::setDevice(devInfo.deviceID());
|
||||
|
||||
cv::Mat frame0 = readImage("opticalflow/rubberwhale1.png", cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(frame0.empty());
|
||||
|
||||
cv::Mat frame1 = readImage("opticalflow/rubberwhale2.png", cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(frame1.empty());
|
||||
|
||||
cv::Size smallSize(320, 240);
|
||||
cv::Mat frame0_small;
|
||||
cv::Mat frame1_small;
|
||||
|
||||
cv::resize(frame0, frame0_small, smallSize);
|
||||
cv::resize(frame1, frame1_small, smallSize);
|
||||
|
||||
cv::gpu::GpuMat d_flowx;
|
||||
cv::gpu::GpuMat d_flowy;
|
||||
cv::gpu::FastOpticalFlowBM fastBM;
|
||||
|
||||
fastBM(loadMat(frame0_small), loadMat(frame1_small), d_flowx, d_flowy, search_window, block_window);
|
||||
|
||||
cv::Mat_<float> flowx;
|
||||
cv::Mat_<float> flowy;
|
||||
FastOpticalFlowBM_gold(frame0_small, frame1_small, flowx, flowy, search_window, block_window);
|
||||
|
||||
double err;
|
||||
|
||||
err = calc_rmse(flowx, cv::Mat(d_flowx));
|
||||
EXPECT_LE(err, MAX_RMSE);
|
||||
|
||||
err = calc_rmse(flowy, cv::Mat(d_flowy));
|
||||
EXPECT_LE(err, MAX_RMSE);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(GPU_Video, FastOpticalFlowBM, ALL_DEVICES);
|
||||
|
||||
#endif // HAVE_CUDA
|
43
modules/gpuoptflow/test/test_precomp.cpp
Normal file
43
modules/gpuoptflow/test/test_precomp.cpp
Normal file
@@ -0,0 +1,43 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "test_precomp.hpp"
|
64
modules/gpuoptflow/test/test_precomp.hpp
Normal file
64
modules/gpuoptflow/test/test_precomp.hpp
Normal file
@@ -0,0 +1,64 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifdef __GNUC__
|
||||
# pragma GCC diagnostic ignored "-Wmissing-declarations"
|
||||
# if defined __clang__ || defined __APPLE__
|
||||
# pragma GCC diagnostic ignored "-Wmissing-prototypes"
|
||||
# pragma GCC diagnostic ignored "-Wextra"
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef __OPENCV_TEST_PRECOMP_HPP__
|
||||
#define __OPENCV_TEST_PRECOMP_HPP__
|
||||
|
||||
#include <fstream>
|
||||
|
||||
#include "opencv2/ts.hpp"
|
||||
#include "opencv2/ts/gpu_test.hpp"
|
||||
|
||||
#include "opencv2/gpuoptflow.hpp"
|
||||
#include "opencv2/gpuimgproc.hpp"
|
||||
#include "opencv2/video.hpp"
|
||||
#include "opencv2/legacy.hpp"
|
||||
|
||||
#endif
|
Reference in New Issue
Block a user