Added new performance tests for calcOpticalFlowPyrLK and buildOpticalFlowPyramid; extracted private header from lkpyramid.cpp
This commit is contained in:
parent
ac8f61ee91
commit
47f72b538f
@ -28,12 +28,12 @@ void FormTrackingPointsArray(vector<Point2f>& points, int width, int height, int
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
PERF_TEST_P(Path_Idx_Cn_NPoints_WSize, OpticalFlowPyrLK, testing::Combine(
|
PERF_TEST_P(Path_Idx_Cn_NPoints_WSize, OpticalFlowPyrLK_full, testing::Combine(
|
||||||
testing::Values<std::string>("cv/optflow/frames/VGA_%02d.png", "cv/optflow/frames/720p_%02d.jpg"),
|
testing::Values<std::string>("cv/optflow/frames/VGA_%02d.png", "cv/optflow/frames/720p_%02d.jpg"),
|
||||||
testing::Range(0, 3),
|
testing::Range(1, 3),
|
||||||
testing::Values(1, 3, 4),
|
testing::Values(1, 3, 4),
|
||||||
testing::Values(make_tuple(9, 9), make_tuple(15, 15)),
|
testing::Values(make_tuple(9, 9), make_tuple(15, 15)),
|
||||||
testing::Values(7, 11, 21, 25)
|
testing::Values(7, 11, 25)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
@ -48,6 +48,7 @@ PERF_TEST_P(Path_Idx_Cn_NPoints_WSize, OpticalFlowPyrLK, testing::Combine(
|
|||||||
int nPointsX = min(get<0>(get<3>(GetParam())), img1.cols);
|
int nPointsX = min(get<0>(get<3>(GetParam())), img1.cols);
|
||||||
int nPointsY = min(get<1>(get<3>(GetParam())), img1.rows);
|
int nPointsY = min(get<1>(get<3>(GetParam())), img1.rows);
|
||||||
int winSize = get<4>(GetParam());
|
int winSize = get<4>(GetParam());
|
||||||
|
|
||||||
int maxLevel = 2;
|
int maxLevel = 2;
|
||||||
TermCriteria criteria(CV_TERMCRIT_ITER|CV_TERMCRIT_EPS, 7, 0.001);
|
TermCriteria criteria(CV_TERMCRIT_ITER|CV_TERMCRIT_EPS, 7, 0.001);
|
||||||
int flags = 0;
|
int flags = 0;
|
||||||
@ -91,3 +92,120 @@ PERF_TEST_P(Path_Idx_Cn_NPoints_WSize, OpticalFlowPyrLK, testing::Combine(
|
|||||||
flags, minEigThreshold);
|
flags, minEigThreshold);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
typedef tr1::tuple<std::string, int, int, tr1::tuple<int,int>, int, bool> Path_Idx_Cn_NPoints_WSize_Deriv_t;
|
||||||
|
typedef TestBaseWithParam<Path_Idx_Cn_NPoints_WSize_Deriv_t> Path_Idx_Cn_NPoints_WSize_Deriv;
|
||||||
|
|
||||||
|
PERF_TEST_P(Path_Idx_Cn_NPoints_WSize_Deriv, OpticalFlowPyrLK_self, testing::Combine(
|
||||||
|
testing::Values<std::string>("cv/optflow/frames/VGA_%02d.png", "cv/optflow/frames/720p_%02d.jpg"),
|
||||||
|
testing::Range(1, 3),
|
||||||
|
testing::Values(1, 3, 4),
|
||||||
|
testing::Values(make_tuple(9, 9), make_tuple(15, 15)),
|
||||||
|
testing::Values(7, 11, 25),
|
||||||
|
testing::Bool()
|
||||||
|
)
|
||||||
|
)
|
||||||
|
{
|
||||||
|
string filename1 = getDataPath(cv::format(get<0>(GetParam()).c_str(), get<1>(GetParam())));
|
||||||
|
string filename2 = getDataPath(cv::format(get<0>(GetParam()).c_str(), get<1>(GetParam()) + 1));
|
||||||
|
Mat img1 = imread(filename1);
|
||||||
|
Mat img2 = imread(filename2);
|
||||||
|
if (img1.empty()) FAIL() << "Unable to load source image " << filename1;
|
||||||
|
if (img2.empty()) FAIL() << "Unable to load source image " << filename2;
|
||||||
|
|
||||||
|
int cn = get<2>(GetParam());
|
||||||
|
int nPointsX = min(get<0>(get<3>(GetParam())), img1.cols);
|
||||||
|
int nPointsY = min(get<1>(get<3>(GetParam())), img1.rows);
|
||||||
|
int winSize = get<4>(GetParam());
|
||||||
|
bool withDerivatives = get<5>(GetParam());
|
||||||
|
|
||||||
|
int maxLevel = 2;
|
||||||
|
TermCriteria criteria(CV_TERMCRIT_ITER|CV_TERMCRIT_EPS, 7, 0.001);
|
||||||
|
int flags = 0;
|
||||||
|
double minEigThreshold = 1e-4;
|
||||||
|
|
||||||
|
Mat frame1, frame2;
|
||||||
|
switch(cn)
|
||||||
|
{
|
||||||
|
case 1:
|
||||||
|
cvtColor(img1, frame1, COLOR_BGR2GRAY, cn);
|
||||||
|
cvtColor(img2, frame2, COLOR_BGR2GRAY, cn);
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
frame1 = img1;
|
||||||
|
frame2 = img2;
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
cvtColor(img1, frame1, COLOR_BGR2BGRA, cn);
|
||||||
|
cvtColor(img2, frame2, COLOR_BGR2BGRA, cn);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
FAIL() << "Unexpected number of channels: " << cn;
|
||||||
|
}
|
||||||
|
|
||||||
|
vector<Point2f> inPoints;
|
||||||
|
vector<Point2f> outPoints;
|
||||||
|
vector<uchar> status;
|
||||||
|
vector<float> err;
|
||||||
|
|
||||||
|
FormTrackingPointsArray(inPoints, frame1.cols, frame1.rows, nPointsX, nPointsY);
|
||||||
|
outPoints.resize(inPoints.size());
|
||||||
|
status.resize(inPoints.size());
|
||||||
|
err.resize(inPoints.size());
|
||||||
|
|
||||||
|
std::vector<Mat> pyramid1, pyramid2;
|
||||||
|
|
||||||
|
maxLevel = buildOpticalFlowPyramid(frame1, pyramid1, Size(winSize, winSize), maxLevel, withDerivatives);
|
||||||
|
maxLevel = buildOpticalFlowPyramid(frame2, pyramid2, Size(winSize, winSize), maxLevel, withDerivatives);
|
||||||
|
|
||||||
|
declare.in(pyramid1, pyramid2, inPoints).out(outPoints);
|
||||||
|
|
||||||
|
TEST_CYCLE()
|
||||||
|
{
|
||||||
|
calcOpticalFlowPyrLK(pyramid1, pyramid2, inPoints, outPoints, status, err,
|
||||||
|
Size(winSize, winSize), maxLevel, criteria,
|
||||||
|
flags, minEigThreshold);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
CV_ENUM(PyrBorderMode, BORDER_DEFAULT, BORDER_TRANSPARENT);
|
||||||
|
typedef tr1::tuple<std::string, int, bool, PyrBorderMode, bool> Path_Win_Deriv_Border_Reuse_t;
|
||||||
|
typedef TestBaseWithParam<Path_Win_Deriv_Border_Reuse_t> Path_Win_Deriv_Border_Reuse;
|
||||||
|
|
||||||
|
PERF_TEST_P(Path_Win_Deriv_Border_Reuse, OpticalFlowPyrLK_pyr, testing::Combine(
|
||||||
|
testing::Values<std::string>("cv/optflow/frames/720p_01.jpg"),
|
||||||
|
testing::Values(7, 11),
|
||||||
|
testing::Bool(),
|
||||||
|
testing::ValuesIn(PyrBorderMode::all()),
|
||||||
|
testing::Bool()
|
||||||
|
)
|
||||||
|
)
|
||||||
|
{
|
||||||
|
string filename = getDataPath(get<0>(GetParam()));
|
||||||
|
Mat img = imread(filename);
|
||||||
|
Size winSize(get<1>(GetParam()), get<1>(GetParam()));
|
||||||
|
bool withDerivatives = get<2>(GetParam());
|
||||||
|
int derivBorder = get<3>(GetParam());
|
||||||
|
int pyrBorder = derivBorder;
|
||||||
|
if(derivBorder != BORDER_TRANSPARENT)
|
||||||
|
{
|
||||||
|
derivBorder = BORDER_CONSTANT;
|
||||||
|
pyrBorder = BORDER_REFLECT_101;
|
||||||
|
}
|
||||||
|
bool tryReuseInputImage = get<4>(GetParam());
|
||||||
|
std::vector<Mat> pyramid;
|
||||||
|
|
||||||
|
img.adjustROI(winSize.height, winSize.height, winSize.width, winSize.width);
|
||||||
|
|
||||||
|
int maxLevel = buildOpticalFlowPyramid(img, pyramid, winSize, 1000, withDerivatives, BORDER_CONSTANT, BORDER_CONSTANT, tryReuseInputImage);
|
||||||
|
|
||||||
|
declare.in(img).out(pyramid);
|
||||||
|
|
||||||
|
|
||||||
|
TEST_CYCLE()
|
||||||
|
{
|
||||||
|
buildOpticalFlowPyramid(img, pyramid, winSize, maxLevel, withDerivatives, pyrBorder, derivBorder, tryReuseInputImage);
|
||||||
|
}
|
||||||
|
|
||||||
|
SANITY_CHECK(pyramid);
|
||||||
|
}
|
@ -41,17 +41,22 @@
|
|||||||
#include "precomp.hpp"
|
#include "precomp.hpp"
|
||||||
#include <float.h>
|
#include <float.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
#include "lkpyramid.hpp"
|
||||||
|
|
||||||
namespace cv
|
namespace
|
||||||
{
|
{
|
||||||
|
static void calcSharrDeriv(const cv::Mat& src, cv::Mat& dst)
|
||||||
typedef short deriv_type;
|
|
||||||
|
|
||||||
static void calcSharrDeriv(const Mat& src, Mat& dst)
|
|
||||||
{
|
{
|
||||||
|
using namespace cv;
|
||||||
|
using cv::detail::deriv_type;
|
||||||
int rows = src.rows, cols = src.cols, cn = src.channels(), colsn = cols*cn, depth = src.depth();
|
int rows = src.rows, cols = src.cols, cn = src.channels(), colsn = cols*cn, depth = src.depth();
|
||||||
CV_Assert(depth == CV_8U);
|
CV_Assert(depth == CV_8U);
|
||||||
dst.create(rows, cols, CV_MAKETYPE(DataType<deriv_type>::depth, cn*2));
|
dst.create(rows, cols, CV_MAKETYPE(DataType<deriv_type>::depth, cn*2));
|
||||||
|
|
||||||
|
#ifdef HAVE_TEGRA_OPTIMIZATION
|
||||||
|
if (tegra::calcSharrDeriv(src, dst))
|
||||||
|
return;
|
||||||
|
#endif
|
||||||
|
|
||||||
int x, y, delta = (int)alignSize((cols + 2)*cn, 16);
|
int x, y, delta = (int)alignSize((cols + 2)*cn, 16);
|
||||||
AutoBuffer<deriv_type> _tempBuf(delta*2 + 64);
|
AutoBuffer<deriv_type> _tempBuf(delta*2 + 64);
|
||||||
@ -127,373 +132,355 @@ static void calcSharrDeriv(const Mat& src, Mat& dst)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
}//namespace
|
||||||
|
|
||||||
struct LKTrackerInvoker
|
cv::detail::LKTrackerInvoker::LKTrackerInvoker(
|
||||||
{
|
const Mat& _prevImg, const Mat& _prevDeriv, const Mat& _nextImg,
|
||||||
LKTrackerInvoker( const Mat& _prevImg, const Mat& _prevDeriv, const Mat& _nextImg,
|
|
||||||
const Point2f* _prevPts, Point2f* _nextPts,
|
const Point2f* _prevPts, Point2f* _nextPts,
|
||||||
uchar* _status, float* _err,
|
uchar* _status, float* _err,
|
||||||
Size _winSize, TermCriteria _criteria,
|
Size _winSize, TermCriteria _criteria,
|
||||||
int _level, int _maxLevel, int _flags, float _minEigThreshold )
|
int _level, int _maxLevel, int _flags, float _minEigThreshold )
|
||||||
{
|
{
|
||||||
prevImg = &_prevImg;
|
prevImg = &_prevImg;
|
||||||
prevDeriv = &_prevDeriv;
|
prevDeriv = &_prevDeriv;
|
||||||
nextImg = &_nextImg;
|
nextImg = &_nextImg;
|
||||||
prevPts = _prevPts;
|
prevPts = _prevPts;
|
||||||
nextPts = _nextPts;
|
nextPts = _nextPts;
|
||||||
status = _status;
|
status = _status;
|
||||||
err = _err;
|
err = _err;
|
||||||
winSize = _winSize;
|
winSize = _winSize;
|
||||||
criteria = _criteria;
|
criteria = _criteria;
|
||||||
level = _level;
|
level = _level;
|
||||||
maxLevel = _maxLevel;
|
maxLevel = _maxLevel;
|
||||||
flags = _flags;
|
flags = _flags;
|
||||||
minEigThreshold = _minEigThreshold;
|
minEigThreshold = _minEigThreshold;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void cv::detail::LKTrackerInvoker::operator()(const BlockedRange& range) const
|
||||||
|
{
|
||||||
|
Point2f halfWin((winSize.width-1)*0.5f, (winSize.height-1)*0.5f);
|
||||||
|
const Mat& I = *prevImg;
|
||||||
|
const Mat& J = *nextImg;
|
||||||
|
const Mat& derivI = *prevDeriv;
|
||||||
|
|
||||||
void operator()(const BlockedRange& range) const
|
int j, cn = I.channels(), cn2 = cn*2;
|
||||||
|
cv::AutoBuffer<deriv_type> _buf(winSize.area()*(cn + cn2));
|
||||||
|
int derivDepth = DataType<deriv_type>::depth;
|
||||||
|
|
||||||
|
Mat IWinBuf(winSize, CV_MAKETYPE(derivDepth, cn), (deriv_type*)_buf);
|
||||||
|
Mat derivIWinBuf(winSize, CV_MAKETYPE(derivDepth, cn2), (deriv_type*)_buf + winSize.area()*cn);
|
||||||
|
|
||||||
|
for( int ptidx = range.begin(); ptidx < range.end(); ptidx++ )
|
||||||
{
|
{
|
||||||
Point2f halfWin((winSize.width-1)*0.5f, (winSize.height-1)*0.5f);
|
Point2f prevPt = prevPts[ptidx]*(float)(1./(1 << level));
|
||||||
const Mat& I = *prevImg;
|
Point2f nextPt;
|
||||||
const Mat& J = *nextImg;
|
if( level == maxLevel )
|
||||||
const Mat& derivI = *prevDeriv;
|
|
||||||
|
|
||||||
int j, cn = I.channels(), cn2 = cn*2;
|
|
||||||
cv::AutoBuffer<deriv_type> _buf(winSize.area()*(cn + cn2));
|
|
||||||
int derivDepth = DataType<deriv_type>::depth;
|
|
||||||
|
|
||||||
Mat IWinBuf(winSize, CV_MAKETYPE(derivDepth, cn), (deriv_type*)_buf);
|
|
||||||
Mat derivIWinBuf(winSize, CV_MAKETYPE(derivDepth, cn2), (deriv_type*)_buf + winSize.area()*cn);
|
|
||||||
|
|
||||||
for( int ptidx = range.begin(); ptidx < range.end(); ptidx++ )
|
|
||||||
{
|
{
|
||||||
Point2f prevPt = prevPts[ptidx]*(float)(1./(1 << level));
|
if( flags & OPTFLOW_USE_INITIAL_FLOW )
|
||||||
Point2f nextPt;
|
nextPt = nextPts[ptidx]*(float)(1./(1 << level));
|
||||||
if( level == maxLevel )
|
|
||||||
{
|
|
||||||
if( flags & OPTFLOW_USE_INITIAL_FLOW )
|
|
||||||
nextPt = nextPts[ptidx]*(float)(1./(1 << level));
|
|
||||||
else
|
|
||||||
nextPt = prevPt;
|
|
||||||
}
|
|
||||||
else
|
else
|
||||||
nextPt = nextPts[ptidx]*2.f;
|
nextPt = prevPt;
|
||||||
nextPts[ptidx] = nextPt;
|
}
|
||||||
|
else
|
||||||
Point2i iprevPt, inextPt;
|
nextPt = nextPts[ptidx]*2.f;
|
||||||
prevPt -= halfWin;
|
nextPts[ptidx] = nextPt;
|
||||||
iprevPt.x = cvFloor(prevPt.x);
|
|
||||||
iprevPt.y = cvFloor(prevPt.y);
|
Point2i iprevPt, inextPt;
|
||||||
|
prevPt -= halfWin;
|
||||||
if( iprevPt.x < -winSize.width || iprevPt.x >= derivI.cols ||
|
iprevPt.x = cvFloor(prevPt.x);
|
||||||
iprevPt.y < -winSize.height || iprevPt.y >= derivI.rows )
|
iprevPt.y = cvFloor(prevPt.y);
|
||||||
|
|
||||||
|
if( iprevPt.x < -winSize.width || iprevPt.x >= derivI.cols ||
|
||||||
|
iprevPt.y < -winSize.height || iprevPt.y >= derivI.rows )
|
||||||
|
{
|
||||||
|
if( level == 0 )
|
||||||
{
|
{
|
||||||
if( level == 0 )
|
if( status )
|
||||||
{
|
status[ptidx] = false;
|
||||||
if( status )
|
if( err )
|
||||||
status[ptidx] = false;
|
err[ptidx] = 0;
|
||||||
if( err )
|
|
||||||
err[ptidx] = 0;
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
float a = prevPt.x - iprevPt.x;
|
||||||
|
float b = prevPt.y - iprevPt.y;
|
||||||
|
const int W_BITS = 14, W_BITS1 = 14;
|
||||||
|
const float FLT_SCALE = 1.f/(1 << 20);
|
||||||
|
int iw00 = cvRound((1.f - a)*(1.f - b)*(1 << W_BITS));
|
||||||
|
int iw01 = cvRound(a*(1.f - b)*(1 << W_BITS));
|
||||||
|
int iw10 = cvRound((1.f - a)*b*(1 << W_BITS));
|
||||||
|
int iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
|
||||||
|
|
||||||
|
int dstep = (int)(derivI.step/derivI.elemSize1());
|
||||||
|
int step = (int)(I.step/I.elemSize1());
|
||||||
|
CV_Assert( step == (int)(J.step/J.elemSize1()) );
|
||||||
|
float A11 = 0, A12 = 0, A22 = 0;
|
||||||
|
|
||||||
|
#if CV_SSE2
|
||||||
|
__m128i qw0 = _mm_set1_epi32(iw00 + (iw01 << 16));
|
||||||
|
__m128i qw1 = _mm_set1_epi32(iw10 + (iw11 << 16));
|
||||||
|
__m128i z = _mm_setzero_si128();
|
||||||
|
__m128i qdelta_d = _mm_set1_epi32(1 << (W_BITS1-1));
|
||||||
|
__m128i qdelta = _mm_set1_epi32(1 << (W_BITS1-5-1));
|
||||||
|
__m128 qA11 = _mm_setzero_ps(), qA12 = _mm_setzero_ps(), qA22 = _mm_setzero_ps();
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// extract the patch from the first image, compute covariation matrix of derivatives
|
||||||
|
int x, y;
|
||||||
|
for( y = 0; y < winSize.height; y++ )
|
||||||
|
{
|
||||||
|
const uchar* src = (const uchar*)I.data + (y + iprevPt.y)*step + iprevPt.x*cn;
|
||||||
|
const deriv_type* dsrc = (const deriv_type*)derivI.data + (y + iprevPt.y)*dstep + iprevPt.x*cn2;
|
||||||
|
|
||||||
float a = prevPt.x - iprevPt.x;
|
deriv_type* Iptr = (deriv_type*)(IWinBuf.data + y*IWinBuf.step);
|
||||||
float b = prevPt.y - iprevPt.y;
|
deriv_type* dIptr = (deriv_type*)(derivIWinBuf.data + y*derivIWinBuf.step);
|
||||||
const int W_BITS = 14, W_BITS1 = 14;
|
|
||||||
const float FLT_SCALE = 1.f/(1 << 20);
|
|
||||||
int iw00 = cvRound((1.f - a)*(1.f - b)*(1 << W_BITS));
|
|
||||||
int iw01 = cvRound(a*(1.f - b)*(1 << W_BITS));
|
|
||||||
int iw10 = cvRound((1.f - a)*b*(1 << W_BITS));
|
|
||||||
int iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
|
|
||||||
|
|
||||||
int dstep = (int)(derivI.step/derivI.elemSize1());
|
x = 0;
|
||||||
int step = (int)(I.step/I.elemSize1());
|
|
||||||
CV_Assert( step == (int)(J.step/J.elemSize1()) );
|
|
||||||
float A11 = 0, A12 = 0, A22 = 0;
|
|
||||||
|
|
||||||
#if CV_SSE2
|
#if CV_SSE2
|
||||||
__m128i qw0 = _mm_set1_epi32(iw00 + (iw01 << 16));
|
for( ; x <= winSize.width*cn - 4; x += 4, dsrc += 4*2, dIptr += 4*2 )
|
||||||
__m128i qw1 = _mm_set1_epi32(iw10 + (iw11 << 16));
|
{
|
||||||
__m128i z = _mm_setzero_si128();
|
__m128i v00, v01, v10, v11, t0, t1;
|
||||||
__m128i qdelta_d = _mm_set1_epi32(1 << (W_BITS1-1));
|
|
||||||
__m128i qdelta = _mm_set1_epi32(1 << (W_BITS1-5-1));
|
v00 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(const int*)(src + x)), z);
|
||||||
__m128 qA11 = _mm_setzero_ps(), qA12 = _mm_setzero_ps(), qA22 = _mm_setzero_ps();
|
v01 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(const int*)(src + x + cn)), z);
|
||||||
|
v10 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(const int*)(src + x + step)), z);
|
||||||
|
v11 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(const int*)(src + x + step + cn)), z);
|
||||||
|
|
||||||
|
t0 = _mm_add_epi32(_mm_madd_epi16(_mm_unpacklo_epi16(v00, v01), qw0),
|
||||||
|
_mm_madd_epi16(_mm_unpacklo_epi16(v10, v11), qw1));
|
||||||
|
t0 = _mm_srai_epi32(_mm_add_epi32(t0, qdelta), W_BITS1-5);
|
||||||
|
_mm_storel_epi64((__m128i*)(Iptr + x), _mm_packs_epi32(t0,t0));
|
||||||
|
|
||||||
|
v00 = _mm_loadu_si128((const __m128i*)(dsrc));
|
||||||
|
v01 = _mm_loadu_si128((const __m128i*)(dsrc + cn2));
|
||||||
|
v10 = _mm_loadu_si128((const __m128i*)(dsrc + dstep));
|
||||||
|
v11 = _mm_loadu_si128((const __m128i*)(dsrc + dstep + cn2));
|
||||||
|
|
||||||
|
t0 = _mm_add_epi32(_mm_madd_epi16(_mm_unpacklo_epi16(v00, v01), qw0),
|
||||||
|
_mm_madd_epi16(_mm_unpacklo_epi16(v10, v11), qw1));
|
||||||
|
t1 = _mm_add_epi32(_mm_madd_epi16(_mm_unpackhi_epi16(v00, v01), qw0),
|
||||||
|
_mm_madd_epi16(_mm_unpackhi_epi16(v10, v11), qw1));
|
||||||
|
t0 = _mm_srai_epi32(_mm_add_epi32(t0, qdelta_d), W_BITS1);
|
||||||
|
t1 = _mm_srai_epi32(_mm_add_epi32(t1, qdelta_d), W_BITS1);
|
||||||
|
v00 = _mm_packs_epi32(t0, t1); // Ix0 Iy0 Ix1 Iy1 ...
|
||||||
|
|
||||||
|
_mm_storeu_si128((__m128i*)dIptr, v00);
|
||||||
|
t0 = _mm_srai_epi32(v00, 16); // Iy0 Iy1 Iy2 Iy3
|
||||||
|
t1 = _mm_srai_epi32(_mm_slli_epi32(v00, 16), 16); // Ix0 Ix1 Ix2 Ix3
|
||||||
|
|
||||||
|
__m128 fy = _mm_cvtepi32_ps(t0);
|
||||||
|
__m128 fx = _mm_cvtepi32_ps(t1);
|
||||||
|
|
||||||
|
qA22 = _mm_add_ps(qA22, _mm_mul_ps(fy, fy));
|
||||||
|
qA12 = _mm_add_ps(qA12, _mm_mul_ps(fx, fy));
|
||||||
|
qA11 = _mm_add_ps(qA11, _mm_mul_ps(fx, fx));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
for( ; x < winSize.width*cn; x++, dsrc += 2, dIptr += 2 )
|
||||||
|
{
|
||||||
|
int ival = CV_DESCALE(src[x]*iw00 + src[x+cn]*iw01 +
|
||||||
|
src[x+step]*iw10 + src[x+step+cn]*iw11, W_BITS1-5);
|
||||||
|
int ixval = CV_DESCALE(dsrc[0]*iw00 + dsrc[cn2]*iw01 +
|
||||||
|
dsrc[dstep]*iw10 + dsrc[dstep+cn2]*iw11, W_BITS1);
|
||||||
|
int iyval = CV_DESCALE(dsrc[1]*iw00 + dsrc[cn2+1]*iw01 + dsrc[dstep+1]*iw10 +
|
||||||
|
dsrc[dstep+cn2+1]*iw11, W_BITS1);
|
||||||
|
|
||||||
|
Iptr[x] = (short)ival;
|
||||||
|
dIptr[0] = (short)ixval;
|
||||||
|
dIptr[1] = (short)iyval;
|
||||||
|
|
||||||
|
A11 += (float)(ixval*ixval);
|
||||||
|
A12 += (float)(ixval*iyval);
|
||||||
|
A22 += (float)(iyval*iyval);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#if CV_SSE2
|
||||||
|
float CV_DECL_ALIGNED(16) A11buf[4], A12buf[4], A22buf[4];
|
||||||
|
_mm_store_ps(A11buf, qA11);
|
||||||
|
_mm_store_ps(A12buf, qA12);
|
||||||
|
_mm_store_ps(A22buf, qA22);
|
||||||
|
A11 += A11buf[0] + A11buf[1] + A11buf[2] + A11buf[3];
|
||||||
|
A12 += A12buf[0] + A12buf[1] + A12buf[2] + A12buf[3];
|
||||||
|
A22 += A22buf[0] + A22buf[1] + A22buf[2] + A22buf[3];
|
||||||
|
#endif
|
||||||
|
|
||||||
|
A11 *= FLT_SCALE;
|
||||||
|
A12 *= FLT_SCALE;
|
||||||
|
A22 *= FLT_SCALE;
|
||||||
|
|
||||||
|
float D = A11*A22 - A12*A12;
|
||||||
|
float minEig = (A22 + A11 - std::sqrt((A11-A22)*(A11-A22) +
|
||||||
|
4.f*A12*A12))/(2*winSize.width*winSize.height);
|
||||||
|
|
||||||
|
if( err && (flags & CV_LKFLOW_GET_MIN_EIGENVALS) != 0 )
|
||||||
|
err[ptidx] = (float)minEig;
|
||||||
|
|
||||||
|
if( minEig < minEigThreshold || D < FLT_EPSILON )
|
||||||
|
{
|
||||||
|
if( level == 0 && status )
|
||||||
|
status[ptidx] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
D = 1.f/D;
|
||||||
|
|
||||||
|
nextPt -= halfWin;
|
||||||
|
Point2f prevDelta;
|
||||||
|
|
||||||
|
for( j = 0; j < criteria.maxCount; j++ )
|
||||||
|
{
|
||||||
|
inextPt.x = cvFloor(nextPt.x);
|
||||||
|
inextPt.y = cvFloor(nextPt.y);
|
||||||
|
|
||||||
|
if( inextPt.x < -winSize.width || inextPt.x >= J.cols ||
|
||||||
|
inextPt.y < -winSize.height || inextPt.y >= J.rows )
|
||||||
|
{
|
||||||
|
if( level == 0 && status )
|
||||||
|
status[ptidx] = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
a = nextPt.x - inextPt.x;
|
||||||
|
b = nextPt.y - inextPt.y;
|
||||||
|
iw00 = cvRound((1.f - a)*(1.f - b)*(1 << W_BITS));
|
||||||
|
iw01 = cvRound(a*(1.f - b)*(1 << W_BITS));
|
||||||
|
iw10 = cvRound((1.f - a)*b*(1 << W_BITS));
|
||||||
|
iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
|
||||||
|
float b1 = 0, b2 = 0;
|
||||||
|
#if CV_SSE2
|
||||||
|
qw0 = _mm_set1_epi32(iw00 + (iw01 << 16));
|
||||||
|
qw1 = _mm_set1_epi32(iw10 + (iw11 << 16));
|
||||||
|
__m128 qb0 = _mm_setzero_ps(), qb1 = _mm_setzero_ps();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// extract the patch from the first image, compute covariation matrix of derivatives
|
|
||||||
int x, y;
|
|
||||||
for( y = 0; y < winSize.height; y++ )
|
for( y = 0; y < winSize.height; y++ )
|
||||||
{
|
{
|
||||||
const uchar* src = (const uchar*)I.data + (y + iprevPt.y)*step + iprevPt.x*cn;
|
const uchar* Jptr = (const uchar*)J.data + (y + inextPt.y)*step + inextPt.x*cn;
|
||||||
const deriv_type* dsrc = (const deriv_type*)derivI.data + (y + iprevPt.y)*dstep + iprevPt.x*cn2;
|
const deriv_type* Iptr = (const deriv_type*)(IWinBuf.data + y*IWinBuf.step);
|
||||||
|
const deriv_type* dIptr = (const deriv_type*)(derivIWinBuf.data + y*derivIWinBuf.step);
|
||||||
deriv_type* Iptr = (deriv_type*)(IWinBuf.data + y*IWinBuf.step);
|
|
||||||
deriv_type* dIptr = (deriv_type*)(derivIWinBuf.data + y*derivIWinBuf.step);
|
|
||||||
|
|
||||||
x = 0;
|
x = 0;
|
||||||
|
|
||||||
#if CV_SSE2
|
#if CV_SSE2
|
||||||
for( ; x <= winSize.width*cn - 4; x += 4, dsrc += 4*2, dIptr += 4*2 )
|
for( ; x <= winSize.width*cn - 8; x += 8, dIptr += 8*2 )
|
||||||
{
|
{
|
||||||
__m128i v00, v01, v10, v11, t0, t1;
|
__m128i diff0 = _mm_loadu_si128((const __m128i*)(Iptr + x)), diff1;
|
||||||
|
__m128i v00 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(Jptr + x)), z);
|
||||||
|
__m128i v01 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(Jptr + x + cn)), z);
|
||||||
|
__m128i v10 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(Jptr + x + step)), z);
|
||||||
|
__m128i v11 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(Jptr + x + step + cn)), z);
|
||||||
|
|
||||||
v00 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(const int*)(src + x)), z);
|
__m128i t0 = _mm_add_epi32(_mm_madd_epi16(_mm_unpacklo_epi16(v00, v01), qw0),
|
||||||
v01 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(const int*)(src + x + cn)), z);
|
_mm_madd_epi16(_mm_unpacklo_epi16(v10, v11), qw1));
|
||||||
v10 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(const int*)(src + x + step)), z);
|
__m128i t1 = _mm_add_epi32(_mm_madd_epi16(_mm_unpackhi_epi16(v00, v01), qw0),
|
||||||
v11 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(const int*)(src + x + step + cn)), z);
|
_mm_madd_epi16(_mm_unpackhi_epi16(v10, v11), qw1));
|
||||||
|
|
||||||
t0 = _mm_add_epi32(_mm_madd_epi16(_mm_unpacklo_epi16(v00, v01), qw0),
|
|
||||||
_mm_madd_epi16(_mm_unpacklo_epi16(v10, v11), qw1));
|
|
||||||
t0 = _mm_srai_epi32(_mm_add_epi32(t0, qdelta), W_BITS1-5);
|
t0 = _mm_srai_epi32(_mm_add_epi32(t0, qdelta), W_BITS1-5);
|
||||||
_mm_storel_epi64((__m128i*)(Iptr + x), _mm_packs_epi32(t0,t0));
|
t1 = _mm_srai_epi32(_mm_add_epi32(t1, qdelta), W_BITS1-5);
|
||||||
|
diff0 = _mm_subs_epi16(_mm_packs_epi32(t0, t1), diff0);
|
||||||
v00 = _mm_loadu_si128((const __m128i*)(dsrc));
|
diff1 = _mm_unpackhi_epi16(diff0, diff0);
|
||||||
v01 = _mm_loadu_si128((const __m128i*)(dsrc + cn2));
|
diff0 = _mm_unpacklo_epi16(diff0, diff0); // It0 It0 It1 It1 ...
|
||||||
v10 = _mm_loadu_si128((const __m128i*)(dsrc + dstep));
|
v00 = _mm_loadu_si128((const __m128i*)(dIptr)); // Ix0 Iy0 Ix1 Iy1 ...
|
||||||
v11 = _mm_loadu_si128((const __m128i*)(dsrc + dstep + cn2));
|
v01 = _mm_loadu_si128((const __m128i*)(dIptr + 8));
|
||||||
|
v10 = _mm_mullo_epi16(v00, diff0);
|
||||||
t0 = _mm_add_epi32(_mm_madd_epi16(_mm_unpacklo_epi16(v00, v01), qw0),
|
v11 = _mm_mulhi_epi16(v00, diff0);
|
||||||
_mm_madd_epi16(_mm_unpacklo_epi16(v10, v11), qw1));
|
v00 = _mm_unpacklo_epi16(v10, v11);
|
||||||
t1 = _mm_add_epi32(_mm_madd_epi16(_mm_unpackhi_epi16(v00, v01), qw0),
|
v10 = _mm_unpackhi_epi16(v10, v11);
|
||||||
_mm_madd_epi16(_mm_unpackhi_epi16(v10, v11), qw1));
|
qb0 = _mm_add_ps(qb0, _mm_cvtepi32_ps(v00));
|
||||||
t0 = _mm_srai_epi32(_mm_add_epi32(t0, qdelta_d), W_BITS1);
|
qb1 = _mm_add_ps(qb1, _mm_cvtepi32_ps(v10));
|
||||||
t1 = _mm_srai_epi32(_mm_add_epi32(t1, qdelta_d), W_BITS1);
|
v10 = _mm_mullo_epi16(v01, diff1);
|
||||||
v00 = _mm_packs_epi32(t0, t1); // Ix0 Iy0 Ix1 Iy1 ...
|
v11 = _mm_mulhi_epi16(v01, diff1);
|
||||||
|
v00 = _mm_unpacklo_epi16(v10, v11);
|
||||||
_mm_storeu_si128((__m128i*)dIptr, v00);
|
v10 = _mm_unpackhi_epi16(v10, v11);
|
||||||
t0 = _mm_srai_epi32(v00, 16); // Iy0 Iy1 Iy2 Iy3
|
qb0 = _mm_add_ps(qb0, _mm_cvtepi32_ps(v00));
|
||||||
t1 = _mm_srai_epi32(_mm_slli_epi32(v00, 16), 16); // Ix0 Ix1 Ix2 Ix3
|
qb1 = _mm_add_ps(qb1, _mm_cvtepi32_ps(v10));
|
||||||
|
|
||||||
__m128 fy = _mm_cvtepi32_ps(t0);
|
|
||||||
__m128 fx = _mm_cvtepi32_ps(t1);
|
|
||||||
|
|
||||||
qA22 = _mm_add_ps(qA22, _mm_mul_ps(fy, fy));
|
|
||||||
qA12 = _mm_add_ps(qA12, _mm_mul_ps(fx, fy));
|
|
||||||
qA11 = _mm_add_ps(qA11, _mm_mul_ps(fx, fx));
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
for( ; x < winSize.width*cn; x++, dsrc += 2, dIptr += 2 )
|
for( ; x < winSize.width*cn; x++, dIptr += 2 )
|
||||||
{
|
{
|
||||||
int ival = CV_DESCALE(src[x]*iw00 + src[x+cn]*iw01 +
|
int diff = CV_DESCALE(Jptr[x]*iw00 + Jptr[x+cn]*iw01 +
|
||||||
src[x+step]*iw10 + src[x+step+cn]*iw11, W_BITS1-5);
|
Jptr[x+step]*iw10 + Jptr[x+step+cn]*iw11,
|
||||||
int ixval = CV_DESCALE(dsrc[0]*iw00 + dsrc[cn2]*iw01 +
|
W_BITS1-5) - Iptr[x];
|
||||||
dsrc[dstep]*iw10 + dsrc[dstep+cn2]*iw11, W_BITS1);
|
b1 += (float)(diff*dIptr[0]);
|
||||||
int iyval = CV_DESCALE(dsrc[1]*iw00 + dsrc[cn2+1]*iw01 + dsrc[dstep+1]*iw10 +
|
b2 += (float)(diff*dIptr[1]);
|
||||||
dsrc[dstep+cn2+1]*iw11, W_BITS1);
|
|
||||||
|
|
||||||
Iptr[x] = (short)ival;
|
|
||||||
dIptr[0] = (short)ixval;
|
|
||||||
dIptr[1] = (short)iyval;
|
|
||||||
|
|
||||||
A11 += (float)(ixval*ixval);
|
|
||||||
A12 += (float)(ixval*iyval);
|
|
||||||
A22 += (float)(iyval*iyval);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if CV_SSE2
|
#if CV_SSE2
|
||||||
float CV_DECL_ALIGNED(16) A11buf[4], A12buf[4], A22buf[4];
|
float CV_DECL_ALIGNED(16) bbuf[4];
|
||||||
_mm_store_ps(A11buf, qA11);
|
_mm_store_ps(bbuf, _mm_add_ps(qb0, qb1));
|
||||||
_mm_store_ps(A12buf, qA12);
|
b1 += bbuf[0] + bbuf[2];
|
||||||
_mm_store_ps(A22buf, qA22);
|
b2 += bbuf[1] + bbuf[3];
|
||||||
A11 += A11buf[0] + A11buf[1] + A11buf[2] + A11buf[3];
|
|
||||||
A12 += A12buf[0] + A12buf[1] + A12buf[2] + A12buf[3];
|
|
||||||
A22 += A22buf[0] + A22buf[1] + A22buf[2] + A22buf[3];
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
b1 *= FLT_SCALE;
|
||||||
|
b2 *= FLT_SCALE;
|
||||||
|
|
||||||
A11 *= FLT_SCALE;
|
Point2f delta( (float)((A12*b2 - A22*b1) * D),
|
||||||
A12 *= FLT_SCALE;
|
(float)((A12*b1 - A11*b2) * D));
|
||||||
A22 *= FLT_SCALE;
|
//delta = -delta;
|
||||||
|
|
||||||
float D = A11*A22 - A12*A12;
|
nextPt += delta;
|
||||||
float minEig = (A22 + A11 - std::sqrt((A11-A22)*(A11-A22) +
|
nextPts[ptidx] = nextPt + halfWin;
|
||||||
4.f*A12*A12))/(2*winSize.width*winSize.height);
|
|
||||||
|
|
||||||
if( err && (flags & CV_LKFLOW_GET_MIN_EIGENVALS) != 0 )
|
if( delta.ddot(delta) <= criteria.epsilon )
|
||||||
err[ptidx] = (float)minEig;
|
break;
|
||||||
|
|
||||||
if( minEig < minEigThreshold || D < FLT_EPSILON )
|
if( j > 0 && std::abs(delta.x + prevDelta.x) < 0.01 &&
|
||||||
|
std::abs(delta.y + prevDelta.y) < 0.01 )
|
||||||
{
|
{
|
||||||
if( level == 0 && status )
|
nextPts[ptidx] -= delta*0.5f;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
prevDelta = delta;
|
||||||
|
}
|
||||||
|
|
||||||
|
if( status[ptidx] && err && level == 0 && (flags & CV_LKFLOW_GET_MIN_EIGENVALS) == 0 )
|
||||||
|
{
|
||||||
|
Point2f nextPt = nextPts[ptidx] - halfWin;
|
||||||
|
Point inextPt;
|
||||||
|
|
||||||
|
inextPt.x = cvFloor(nextPt.x);
|
||||||
|
inextPt.y = cvFloor(nextPt.y);
|
||||||
|
|
||||||
|
if( inextPt.x < -winSize.width || inextPt.x >= J.cols ||
|
||||||
|
inextPt.y < -winSize.height || inextPt.y >= J.rows )
|
||||||
|
{
|
||||||
|
if( status )
|
||||||
status[ptidx] = false;
|
status[ptidx] = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
D = 1.f/D;
|
float a = nextPt.x - inextPt.x;
|
||||||
|
float b = nextPt.y - inextPt.y;
|
||||||
|
iw00 = cvRound((1.f - a)*(1.f - b)*(1 << W_BITS));
|
||||||
|
iw01 = cvRound(a*(1.f - b)*(1 << W_BITS));
|
||||||
|
iw10 = cvRound((1.f - a)*b*(1 << W_BITS));
|
||||||
|
iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
|
||||||
|
float errval = 0.f;
|
||||||
|
|
||||||
nextPt -= halfWin;
|
for( y = 0; y < winSize.height; y++ )
|
||||||
Point2f prevDelta;
|
|
||||||
|
|
||||||
for( j = 0; j < criteria.maxCount; j++ )
|
|
||||||
{
|
{
|
||||||
inextPt.x = cvFloor(nextPt.x);
|
const uchar* Jptr = (const uchar*)J.data + (y + inextPt.y)*step + inextPt.x*cn;
|
||||||
inextPt.y = cvFloor(nextPt.y);
|
const deriv_type* Iptr = (const deriv_type*)(IWinBuf.data + y*IWinBuf.step);
|
||||||
|
|
||||||
if( inextPt.x < -winSize.width || inextPt.x >= J.cols ||
|
for( x = 0; x < winSize.width*cn; x++ )
|
||||||
inextPt.y < -winSize.height || inextPt.y >= J.rows )
|
|
||||||
{
|
{
|
||||||
if( level == 0 && status )
|
int diff = CV_DESCALE(Jptr[x]*iw00 + Jptr[x+cn]*iw01 +
|
||||||
status[ptidx] = false;
|
Jptr[x+step]*iw10 + Jptr[x+step+cn]*iw11,
|
||||||
break;
|
W_BITS1-5) - Iptr[x];
|
||||||
|
errval += std::abs((float)diff);
|
||||||
}
|
}
|
||||||
|
|
||||||
a = nextPt.x - inextPt.x;
|
|
||||||
b = nextPt.y - inextPt.y;
|
|
||||||
iw00 = cvRound((1.f - a)*(1.f - b)*(1 << W_BITS));
|
|
||||||
iw01 = cvRound(a*(1.f - b)*(1 << W_BITS));
|
|
||||||
iw10 = cvRound((1.f - a)*b*(1 << W_BITS));
|
|
||||||
iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
|
|
||||||
float b1 = 0, b2 = 0;
|
|
||||||
#if CV_SSE2
|
|
||||||
qw0 = _mm_set1_epi32(iw00 + (iw01 << 16));
|
|
||||||
qw1 = _mm_set1_epi32(iw10 + (iw11 << 16));
|
|
||||||
__m128 qb0 = _mm_setzero_ps(), qb1 = _mm_setzero_ps();
|
|
||||||
#endif
|
|
||||||
|
|
||||||
for( y = 0; y < winSize.height; y++ )
|
|
||||||
{
|
|
||||||
const uchar* Jptr = (const uchar*)J.data + (y + inextPt.y)*step + inextPt.x*cn;
|
|
||||||
const deriv_type* Iptr = (const deriv_type*)(IWinBuf.data + y*IWinBuf.step);
|
|
||||||
const deriv_type* dIptr = (const deriv_type*)(derivIWinBuf.data + y*derivIWinBuf.step);
|
|
||||||
|
|
||||||
x = 0;
|
|
||||||
|
|
||||||
#if CV_SSE2
|
|
||||||
for( ; x <= winSize.width*cn - 8; x += 8, dIptr += 8*2 )
|
|
||||||
{
|
|
||||||
__m128i diff0 = _mm_loadu_si128((const __m128i*)(Iptr + x)), diff1;
|
|
||||||
__m128i v00 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(Jptr + x)), z);
|
|
||||||
__m128i v01 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(Jptr + x + cn)), z);
|
|
||||||
__m128i v10 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(Jptr + x + step)), z);
|
|
||||||
__m128i v11 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(Jptr + x + step + cn)), z);
|
|
||||||
|
|
||||||
__m128i t0 = _mm_add_epi32(_mm_madd_epi16(_mm_unpacklo_epi16(v00, v01), qw0),
|
|
||||||
_mm_madd_epi16(_mm_unpacklo_epi16(v10, v11), qw1));
|
|
||||||
__m128i t1 = _mm_add_epi32(_mm_madd_epi16(_mm_unpackhi_epi16(v00, v01), qw0),
|
|
||||||
_mm_madd_epi16(_mm_unpackhi_epi16(v10, v11), qw1));
|
|
||||||
t0 = _mm_srai_epi32(_mm_add_epi32(t0, qdelta), W_BITS1-5);
|
|
||||||
t1 = _mm_srai_epi32(_mm_add_epi32(t1, qdelta), W_BITS1-5);
|
|
||||||
diff0 = _mm_subs_epi16(_mm_packs_epi32(t0, t1), diff0);
|
|
||||||
diff1 = _mm_unpackhi_epi16(diff0, diff0);
|
|
||||||
diff0 = _mm_unpacklo_epi16(diff0, diff0); // It0 It0 It1 It1 ...
|
|
||||||
v00 = _mm_loadu_si128((const __m128i*)(dIptr)); // Ix0 Iy0 Ix1 Iy1 ...
|
|
||||||
v01 = _mm_loadu_si128((const __m128i*)(dIptr + 8));
|
|
||||||
v10 = _mm_mullo_epi16(v00, diff0);
|
|
||||||
v11 = _mm_mulhi_epi16(v00, diff0);
|
|
||||||
v00 = _mm_unpacklo_epi16(v10, v11);
|
|
||||||
v10 = _mm_unpackhi_epi16(v10, v11);
|
|
||||||
qb0 = _mm_add_ps(qb0, _mm_cvtepi32_ps(v00));
|
|
||||||
qb1 = _mm_add_ps(qb1, _mm_cvtepi32_ps(v10));
|
|
||||||
v10 = _mm_mullo_epi16(v01, diff1);
|
|
||||||
v11 = _mm_mulhi_epi16(v01, diff1);
|
|
||||||
v00 = _mm_unpacklo_epi16(v10, v11);
|
|
||||||
v10 = _mm_unpackhi_epi16(v10, v11);
|
|
||||||
qb0 = _mm_add_ps(qb0, _mm_cvtepi32_ps(v00));
|
|
||||||
qb1 = _mm_add_ps(qb1, _mm_cvtepi32_ps(v10));
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
for( ; x < winSize.width*cn; x++, dIptr += 2 )
|
|
||||||
{
|
|
||||||
int diff = CV_DESCALE(Jptr[x]*iw00 + Jptr[x+cn]*iw01 +
|
|
||||||
Jptr[x+step]*iw10 + Jptr[x+step+cn]*iw11,
|
|
||||||
W_BITS1-5) - Iptr[x];
|
|
||||||
b1 += (float)(diff*dIptr[0]);
|
|
||||||
b2 += (float)(diff*dIptr[1]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#if CV_SSE2
|
|
||||||
float CV_DECL_ALIGNED(16) bbuf[4];
|
|
||||||
_mm_store_ps(bbuf, _mm_add_ps(qb0, qb1));
|
|
||||||
b1 += bbuf[0] + bbuf[2];
|
|
||||||
b2 += bbuf[1] + bbuf[3];
|
|
||||||
#endif
|
|
||||||
|
|
||||||
b1 *= FLT_SCALE;
|
|
||||||
b2 *= FLT_SCALE;
|
|
||||||
|
|
||||||
Point2f delta( (float)((A12*b2 - A22*b1) * D),
|
|
||||||
(float)((A12*b1 - A11*b2) * D));
|
|
||||||
//delta = -delta;
|
|
||||||
|
|
||||||
nextPt += delta;
|
|
||||||
nextPts[ptidx] = nextPt + halfWin;
|
|
||||||
|
|
||||||
if( delta.ddot(delta) <= criteria.epsilon )
|
|
||||||
break;
|
|
||||||
|
|
||||||
if( j > 0 && std::abs(delta.x + prevDelta.x) < 0.01 &&
|
|
||||||
std::abs(delta.y + prevDelta.y) < 0.01 )
|
|
||||||
{
|
|
||||||
nextPts[ptidx] -= delta*0.5f;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
prevDelta = delta;
|
|
||||||
}
|
|
||||||
|
|
||||||
if( status[ptidx] && err && level == 0 && (flags & CV_LKFLOW_GET_MIN_EIGENVALS) == 0 )
|
|
||||||
{
|
|
||||||
Point2f nextPt = nextPts[ptidx] - halfWin;
|
|
||||||
Point inextPt;
|
|
||||||
|
|
||||||
inextPt.x = cvFloor(nextPt.x);
|
|
||||||
inextPt.y = cvFloor(nextPt.y);
|
|
||||||
|
|
||||||
if( inextPt.x < -winSize.width || inextPt.x >= J.cols ||
|
|
||||||
inextPt.y < -winSize.height || inextPt.y >= J.rows )
|
|
||||||
{
|
|
||||||
if( status )
|
|
||||||
status[ptidx] = false;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
float a = nextPt.x - inextPt.x;
|
|
||||||
float b = nextPt.y - inextPt.y;
|
|
||||||
iw00 = cvRound((1.f - a)*(1.f - b)*(1 << W_BITS));
|
|
||||||
iw01 = cvRound(a*(1.f - b)*(1 << W_BITS));
|
|
||||||
iw10 = cvRound((1.f - a)*b*(1 << W_BITS));
|
|
||||||
iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
|
|
||||||
float errval = 0.f;
|
|
||||||
|
|
||||||
for( y = 0; y < winSize.height; y++ )
|
|
||||||
{
|
|
||||||
const uchar* Jptr = (const uchar*)J.data + (y + inextPt.y)*step + inextPt.x*cn;
|
|
||||||
const deriv_type* Iptr = (const deriv_type*)(IWinBuf.data + y*IWinBuf.step);
|
|
||||||
|
|
||||||
for( x = 0; x < winSize.width*cn; x++ )
|
|
||||||
{
|
|
||||||
int diff = CV_DESCALE(Jptr[x]*iw00 + Jptr[x+cn]*iw01 +
|
|
||||||
Jptr[x+step]*iw10 + Jptr[x+step+cn]*iw11,
|
|
||||||
W_BITS1-5) - Iptr[x];
|
|
||||||
errval += std::abs((float)diff);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
err[ptidx] = errval * 1.f/(32*winSize.width*cn*winSize.height);
|
|
||||||
}
|
}
|
||||||
|
err[ptidx] = errval * 1.f/(32*winSize.width*cn*winSize.height);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const Mat* prevImg;
|
|
||||||
const Mat* nextImg;
|
|
||||||
const Mat* prevDeriv;
|
|
||||||
const Point2f* prevPts;
|
|
||||||
Point2f* nextPts;
|
|
||||||
uchar* status;
|
|
||||||
float* err;
|
|
||||||
Size winSize;
|
|
||||||
TermCriteria criteria;
|
|
||||||
int level;
|
|
||||||
int maxLevel;
|
|
||||||
int flags;
|
|
||||||
float minEigThreshold;
|
|
||||||
};
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int cv::buildOpticalFlowPyramid(InputArray _img, OutputArrayOfArrays pyramid, Size winSize, int maxLevel, bool withDerivatives,
|
int cv::buildOpticalFlowPyramid(InputArray _img, OutputArrayOfArrays pyramid, Size winSize, int maxLevel, bool withDerivatives,
|
||||||
int pyrBorder, int derivBorder, bool tryReuseInputImage)
|
int pyrBorder, int derivBorder, bool tryReuseInputImage)
|
||||||
{
|
{
|
||||||
@ -503,7 +490,7 @@ int cv::buildOpticalFlowPyramid(InputArray _img, OutputArrayOfArrays pyramid, Si
|
|||||||
|
|
||||||
pyramid.create(1, (maxLevel + 1) * pyrstep, 0 /*type*/, -1, true, 0);
|
pyramid.create(1, (maxLevel + 1) * pyrstep, 0 /*type*/, -1, true, 0);
|
||||||
|
|
||||||
int derivType = CV_MAKETYPE(DataType<deriv_type>::depth, img.channels() * 2);
|
int derivType = CV_MAKETYPE(DataType<cv::detail::deriv_type>::depth, img.channels() * 2);
|
||||||
|
|
||||||
//level 0
|
//level 0
|
||||||
bool lvl0IsSet = false;
|
bool lvl0IsSet = false;
|
||||||
@ -602,7 +589,7 @@ void cv::calcOpticalFlowPyrLK( InputArray _prevImg, InputArray _nextImg,
|
|||||||
return;
|
return;
|
||||||
#endif
|
#endif
|
||||||
Mat prevPtsMat = _prevPts.getMat();
|
Mat prevPtsMat = _prevPts.getMat();
|
||||||
const int derivDepth = DataType<deriv_type>::depth;
|
const int derivDepth = DataType<cv::detail::deriv_type>::depth;
|
||||||
|
|
||||||
CV_Assert( maxLevel >= 0 && winSize.width > 2 && winSize.height > 2 );
|
CV_Assert( maxLevel >= 0 && winSize.width > 2 && winSize.height > 2 );
|
||||||
|
|
||||||
@ -744,6 +731,8 @@ void cv::calcOpticalFlowPyrLK( InputArray _prevImg, InputArray _nextImg,
|
|||||||
CV_Assert(prevPyr[level * lvlStep1].size() == nextPyr[level * lvlStep2].size());
|
CV_Assert(prevPyr[level * lvlStep1].size() == nextPyr[level * lvlStep2].size());
|
||||||
CV_Assert(prevPyr[level * lvlStep1].type() == nextPyr[level * lvlStep2].type());
|
CV_Assert(prevPyr[level * lvlStep1].type() == nextPyr[level * lvlStep2].type());
|
||||||
|
|
||||||
|
typedef cv::detail::LKTrackerInvoker LKTrackerInvoker;
|
||||||
|
|
||||||
parallel_for(BlockedRange(0, npoints), LKTrackerInvoker(prevPyr[level * lvlStep1], derivI,
|
parallel_for(BlockedRange(0, npoints), LKTrackerInvoker(prevPyr[level * lvlStep1], derivI,
|
||||||
nextPyr[level * lvlStep2], prevPts, nextPts,
|
nextPyr[level * lvlStep2], prevPts, nextPts,
|
||||||
status, err,
|
status, err,
|
||||||
|
36
modules/video/src/lkpyramid.hpp
Normal file
36
modules/video/src/lkpyramid.hpp
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
namespace cv
|
||||||
|
{
|
||||||
|
namespace detail
|
||||||
|
{
|
||||||
|
|
||||||
|
typedef short deriv_type;
|
||||||
|
|
||||||
|
struct LKTrackerInvoker
|
||||||
|
{
|
||||||
|
LKTrackerInvoker( const Mat& _prevImg, const Mat& _prevDeriv, const Mat& _nextImg,
|
||||||
|
const Point2f* _prevPts, Point2f* _nextPts,
|
||||||
|
uchar* _status, float* _err,
|
||||||
|
Size _winSize, TermCriteria _criteria,
|
||||||
|
int _level, int _maxLevel, int _flags, float _minEigThreshold );
|
||||||
|
|
||||||
|
void operator()(const BlockedRange& range) const;
|
||||||
|
|
||||||
|
const Mat* prevImg;
|
||||||
|
const Mat* nextImg;
|
||||||
|
const Mat* prevDeriv;
|
||||||
|
const Point2f* prevPts;
|
||||||
|
Point2f* nextPts;
|
||||||
|
uchar* status;
|
||||||
|
float* err;
|
||||||
|
Size winSize;
|
||||||
|
TermCriteria criteria;
|
||||||
|
int level;
|
||||||
|
int maxLevel;
|
||||||
|
int flags;
|
||||||
|
float minEigThreshold;
|
||||||
|
};
|
||||||
|
|
||||||
|
}// namespace detail
|
||||||
|
}// namespace cv
|
Loading…
Reference in New Issue
Block a user