Merge pull request from vbystricky:ocl_calcOpticalFlowPyrLK

This commit is contained in:
Andrey Pavlenko 2014-01-25 17:08:06 +04:00 committed by OpenCV Buildbot
commit a64d3c1744
6 changed files with 1283 additions and 2 deletions
modules
core
include/opencv2/core
src
video

@ -60,6 +60,7 @@ class CV_EXPORTS Program;
class CV_EXPORTS ProgramSource2;
class CV_EXPORTS Queue;
class CV_EXPORTS PlatformInfo2;
class CV_EXPORTS Image2D;
class CV_EXPORTS Device
{
@ -89,6 +90,7 @@ public:
String vendor() const;
String OpenCL_C_Version() const;
String OpenCLVersion() const;
String deviceVersion() const;
String driverVersion() const;
void* ptr() const;
@ -325,6 +327,7 @@ public:
const String& buildopts, String* errmsg=0);
int set(int i, const void* value, size_t sz);
int set(int i, const Image2D& image2D);
int set(int i, const UMat& m);
int set(int i, const KernelArg& arg);
template<typename _Tp> int set(int i, const _Tp& value)
@ -574,6 +577,19 @@ CV_EXPORTS const char* typeToStr(int t);
CV_EXPORTS const char* memopTypeToStr(int t);
CV_EXPORTS void getPlatfomsInfo(std::vector<PlatformInfo2>& platform_info);
class CV_EXPORTS Image2D
{
public:
Image2D();
Image2D(const UMat &src);
~Image2D();
void* ptr() const;
protected:
struct Impl;
Impl* p;
};
}}
#endif

@ -821,6 +821,7 @@ OCL_FUNC_P(cl_mem, clCreateSubBuffer,
const void * buffer_create_info,
cl_int * errcode_ret),
(buffer, flags, buffer_create_type, buffer_create_info, errcode_ret))
*/
OCL_FUNC_P(cl_mem, clCreateImage,
(cl_context context,
@ -831,6 +832,18 @@ OCL_FUNC_P(cl_mem, clCreateImage,
cl_int * errcode_ret),
(context, flags, image_format, image_desc, host_ptr, errcode_ret))
OCL_FUNC_P(cl_mem, clCreateImage2D,
(cl_context context,
cl_mem_flags flags,
const cl_image_format * image_format,
size_t image_width,
size_t image_height,
size_t image_row_pitch,
void * host_ptr,
cl_int *errcode_ret),
(context, flags, image_format, image_width, image_height, image_row_pitch, host_ptr, errcode_ret))
/*
OCL_FUNC(cl_int, clGetSupportedImageFormats,
(cl_context context,
cl_mem_flags flags,
@ -945,21 +958,26 @@ OCL_FUNC(cl_int, clEnqueueCopyImageToBuffer,
cl_event * event),
(command_queue, src_image, dst_buffer, src_origin, region, dst_offset,
num_events_in_wait_list, event_wait_list, event))
*/
OCL_FUNC(cl_int, clEnqueueCopyBufferToImage,
(cl_command_queue command_queue,
cl_mem src_buffer,
cl_mem dst_image,
size_t src_offset,
const size_t * dst_origin[3],
const size_t * region[3],
const size_t dst_origin[3],
const size_t region[3],
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event),
(command_queue, src_buffer, dst_image, src_offset, dst_origin,
region, num_events_in_wait_list, event_wait_list, event))
OCL_FUNC(cl_int, clFlush,
(cl_command_queue command_queue),
(command_queue))
/*
OCL_FUNC_P(void*, clEnqueueMapImage,
(cl_command_queue command_queue,
cl_mem image,
@ -976,7 +994,9 @@ OCL_FUNC_P(void*, clEnqueueMapImage,
(command_queue, image, blocking_map, map_flags, origin, region,
image_row_pitch, image_slice_pitch, num_events_in_wait_list,
event_wait_list, event, errcode_ret))
*/
/*
OCL_FUNC(cl_int, clRetainProgram, (cl_program program), (program))
OCL_FUNC(cl_int, clGetKernelInfo,
@ -1705,6 +1725,9 @@ String Device::OpenCL_C_Version() const
String Device::OpenCLVersion() const
{ return p ? p->getStrProp(CL_DEVICE_EXTENSIONS) : String(); }
String Device::deviceVersion() const
{ return p ? p->getStrProp(CL_DEVICE_VERSION) : String(); }
String Device::driverVersion() const
{ return p ? p->getStrProp(CL_DRIVER_VERSION) : String(); }
@ -2689,6 +2712,12 @@ int Kernel::set(int i, const void* value, size_t sz)
return i+1;
}
int Kernel::set(int i, const Image2D& image2D)
{
cl_mem h = (cl_mem)image2D.ptr();
return set(i, &h, sizeof(h));
}
int Kernel::set(int i, const UMat& m)
{
return set(i, KernelArg(KernelArg::READ_WRITE, (UMat*)&m, 0, 0));
@ -3785,4 +3814,151 @@ const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf)
return buf;
}
///////////////////////////////////////////////////////////////////////////////////////////////
// deviceVersion has format
// OpenCL<space><major_version.minor_version><space><vendor-specific information>
// by specification
// http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clGetDeviceInfo.html
// http://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clGetDeviceInfo.html
static void parseDeviceVersion(const String &deviceVersion, int &major, int &minor)
{
major = minor = 0;
if (10 >= deviceVersion.length())
return;
const char *pstr = deviceVersion.c_str();
if (0 != strncmp(pstr, "OpenCL ", 7))
return;
size_t ppos = deviceVersion.find('.', 7);
if (String::npos == ppos)
return;
String temp = deviceVersion.substr(7, ppos - 7);
major = atoi(temp.c_str());
temp = deviceVersion.substr(ppos + 1);
minor = atoi(temp.c_str());
}
struct Image2D::Impl
{
Impl(const UMat &src)
{
handle = 0;
refcount = 1;
init(src);
}
~Impl()
{
if (handle)
clReleaseMemObject(handle);
}
void init(const UMat &src)
{
cl_image_format format;
int err;
int depth = src.depth();
int channels = src.channels();
switch(depth)
{
case CV_8U:
format.image_channel_data_type = CL_UNSIGNED_INT8;
break;
case CV_32S:
format.image_channel_data_type = CL_UNSIGNED_INT32;
break;
case CV_32F:
format.image_channel_data_type = CL_FLOAT;
break;
default:
CV_Error(-1, "Image forma is not supported");
break;
}
switch(channels)
{
case 1:
format.image_channel_order = CL_R;
break;
case 3:
format.image_channel_order = CL_RGB;
break;
case 4:
format.image_channel_order = CL_RGBA;
break;
default:
CV_Error(-1, "Image format is not supported");
break;
}
#ifdef CL_VERSION_1_2
//this enables backwards portability to
//run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support
int minor, major;
parseDeviceVersion(Device::getDefault().deviceVersion(), major, minor);
if ((1 < major) || ((1 == major) && (2 <= minor)))
{
cl_image_desc desc;
desc.image_type = CL_MEM_OBJECT_IMAGE2D;
desc.image_width = src.cols;
desc.image_height = src.rows;
desc.image_depth = 0;
desc.image_array_size = 1;
desc.image_row_pitch = 0;
desc.image_slice_pitch = 0;
desc.buffer = NULL;
desc.num_mip_levels = 0;
desc.num_samples = 0;
handle = clCreateImage((cl_context)Context2::getDefault().ptr(), CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
}
else
#endif
{
handle = clCreateImage2D((cl_context)Context2::getDefault().ptr(), CL_MEM_READ_WRITE, &format, src.cols, src.rows, 0, NULL, &err);
}
size_t origin[] = { 0, 0, 0 };
size_t region[] = { src.cols, src.rows, 1 };
cl_mem devData;
if (!src.isContinuous())
{
devData = clCreateBuffer((cl_context)Context2::getDefault().ptr(), CL_MEM_READ_ONLY, src.cols * src.rows * src.elemSize(), NULL, NULL);
const size_t roi[3] = {src.cols * src.elemSize(), src.rows, 1};
clEnqueueCopyBufferRect((cl_command_queue)Queue::getDefault().ptr(), (cl_mem)src.handle(ACCESS_READ), devData, origin, origin,
roi, src.step, 0, src.cols * src.elemSize(), 0, 0, NULL, NULL);
clFlush((cl_command_queue)Queue::getDefault().ptr());
}
else
{
devData = (cl_mem)src.handle(ACCESS_READ);
}
clEnqueueCopyBufferToImage((cl_command_queue)Queue::getDefault().ptr(), devData, handle, 0, origin, region, 0, NULL, 0);
if (!src.isContinuous())
{
clFlush((cl_command_queue)Queue::getDefault().ptr());
clReleaseMemObject(devData);
}
}
IMPLEMENT_REFCOUNTABLE();
cl_mem handle;
};
Image2D::Image2D()
{
p = NULL;
}
Image2D::Image2D(const UMat &src)
{
p = new Impl(src);
}
Image2D::~Image2D()
{
if (p)
p->release();
}
void* Image2D::ptr() const
{
return p ? p->handle : 0;
}
}}

@ -0,0 +1,102 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Fangfang Bai, fangfang@multicorewareinc.com
// Jin Ma, jin@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "perf_precomp.hpp"
#include "opencv2/ts/ocl_perf.hpp"
using std::tr1::make_tuple;
#ifdef HAVE_OPENCL
namespace cvtest {
namespace ocl {
///////////// FarnebackOpticalFlow ////////////////////////
CV_ENUM(farneFlagType, 0, OPTFLOW_FARNEBACK_GAUSSIAN)
typedef tuple< int > PyrLKOpticalFlowParams;
typedef TestBaseWithParam<PyrLKOpticalFlowParams> PyrLKOpticalFlowFixture;
OCL_PERF_TEST_P(PyrLKOpticalFlowFixture, PyrLKOpticalFlow,
::testing::Values(1000, 2000, 4000)
)
{
Mat frame0 = imread(getDataPath("gpu/opticalflow/rubberwhale1.png"), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame0.empty()) << "can't load rubberwhale1.png";
Mat frame1 = imread(getDataPath("gpu/opticalflow/rubberwhale2.png"), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame1.empty()) << "can't load rubberwhale2.png";
UMat uFrame0; frame0.copyTo(uFrame0);
UMat uFrame1; frame1.copyTo(uFrame1);
const Size winSize = Size(21, 21);
const int maxLevel = 3;
const TermCriteria criteria = TermCriteria(TermCriteria::COUNT+TermCriteria::EPS, 30, 0.01);
const int flags = 0;
const float minEigThreshold = 1e-4f;
const double eps = 1.0;
const PyrLKOpticalFlowParams params = GetParam();
const int pointsCount = get<0>(params);
vector<Point2f> pts, nextPts;
vector<unsigned char> status;
vector<float> err;
goodFeaturesToTrack(frame0, pts, pointsCount, 0.01, 0.0);
Mat ptsMat(1, static_cast<int>(pts.size()), CV_32FC2, (void *)&pts[0]);
declare.in(uFrame0, uFrame1, WARMUP_READ);
UMat uNextPts, uStatus, uErr;
OCL_TEST_CYCLE()
cv::calcOpticalFlowPyrLK(uFrame0, uFrame1, pts, uNextPts, uStatus, uErr, winSize, maxLevel, criteria, flags, minEigThreshold);
SANITY_CHECK(uNextPts, eps);
}
} } // namespace cvtest::ocl
#endif // HAVE_OPENCL

@ -43,6 +43,7 @@
#include <float.h>
#include <stdio.h>
#include "lkpyramid.hpp"
#include "opencl_kernels.hpp"
#define CV_DESCALE(x,n) (((x) + (1 << ((n)-1))) >> (n))
@ -590,6 +591,262 @@ int cv::buildOpticalFlowPyramid(InputArray _img, OutputArrayOfArrays pyramid, Si
return maxLevel;
}
namespace cv
{
class PyrLKOpticalFlow
{
struct dim3
{
unsigned int x, y, z;
};
public:
PyrLKOpticalFlow()
{
winSize = Size(21, 21);
maxLevel = 3;
iters = 30;
derivLambda = 0.5;
useInitialFlow = false;
}
bool checkParam()
{
iters = std::min(std::max(iters, 0), 100);
derivLambda = std::min(std::max(derivLambda, 0.0), 1.0);
if (derivLambda < 0)
return false;
if (maxLevel < 0 || winSize.width <= 2 || winSize.height <= 2)
return false;
calcPatchSize();
if (patch.x <= 0 || patch.x >= 6 || patch.y <= 0 || patch.y >= 6)
return false;
if (!initWaveSize())
return false;
return true;
}
bool sparse(const UMat &prevImg, const UMat &nextImg, const UMat &prevPts, UMat &nextPts, UMat &status, UMat &err)
{
if (!checkParam())
return false;
UMat temp1 = (useInitialFlow ? nextPts : prevPts).reshape(1);
UMat temp2 = nextPts.reshape(1);
multiply(1.0f / (1 << maxLevel) /2.0f, temp1, temp2);
status.setTo(Scalar::all(1));
// build the image pyramids.
std::vector<UMat> prevPyr; prevPyr.resize(maxLevel + 1);
std::vector<UMat> nextPyr; nextPyr.resize(maxLevel + 1);
prevImg.convertTo(prevPyr[0], CV_32F);
nextImg.convertTo(nextPyr[0], CV_32F);
for (int level = 1; level <= maxLevel; ++level)
{
pyrDown(prevPyr[level - 1], prevPyr[level]);
pyrDown(nextPyr[level - 1], nextPyr[level]);
}
// dI/dx ~ Ix, dI/dy ~ Iy
for (int level = maxLevel; level >= 0; level--)
{
if (!lkSparse_run(prevPyr[level], nextPyr[level], prevPts,
nextPts, status, err,
prevPts.cols, level))
return false;
}
return true;
}
Size winSize;
int maxLevel;
int iters;
double derivLambda;
bool useInitialFlow;
private:
int waveSize;
bool initWaveSize()
{
waveSize = 1;
if (isDeviceCPU())
return true;
ocl::Kernel kernel;
if (!kernel.create("lkSparse", cv::ocl::video::pyrlk_oclsrc, ""))
return false;
waveSize = (int)kernel.preferedWorkGroupSizeMultiple();
return true;
}
dim3 patch;
void calcPatchSize()
{
dim3 block;
if (winSize.width > 32 && winSize.width > 2 * winSize.height)
{
block.x = 32;
block.y = 8;
}
else
{
block.x = 16;
block.y = 16;
}
patch.x = (winSize.width + block.x - 1) / block.x;
patch.y = (winSize.height + block.y - 1) / block.y;
block.z = patch.z = 1;
}
#define SAFE_KERNEL_SET_ARG(idx, arg) \
{\
int idxNew = kernel.set(idx, arg);\
if (-1 == idxNew)\
{\
printf("lkSparse_run can't setup argument index = %d to kernel\n", idx);\
return false;\
}\
idx = idxNew;\
}
bool lkSparse_run(UMat &I, UMat &J, const UMat &prevPts, UMat &nextPts, UMat &status, UMat& err,
int ptcount, int level)
{
size_t localThreads[3] = { 8, 8};
size_t globalThreads[3] = { 8 * ptcount, 8};
char calcErr = (0 == level) ? 1 : 0;
cv::String build_options;
if (isDeviceCPU())
build_options = " -D CPU";
else
build_options = cv::format("-D WAVE_SIZE=%d", waveSize);
ocl::Kernel kernel;
if (!kernel.create("lkSparse", cv::ocl::video::pyrlk_oclsrc, build_options))
return false;
ocl::Image2D imageI(I);
ocl::Image2D imageJ(J);
int idxArg = 0;
#if 0
idxArg = kernel.set(idxArg, imageI); //image2d_t I
idxArg = kernel.set(idxArg, imageJ); //image2d_t J
idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(prevPts)); // __global const float2* prevPts
idxArg = kernel.set(idxArg, (int)prevPts.step); // int prevPtsStep
idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(nextPts)); // __global const float2* nextPts
idxArg = kernel.set(idxArg, (int)nextPts.step); // int nextPtsStep
idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(status)); // __global uchar* status
idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(err)); // __global float* err
idxArg = kernel.set(idxArg, (int)level); // const int level
idxArg = kernel.set(idxArg, (int)I.rows); // const int rows
idxArg = kernel.set(idxArg, (int)I.cols); // const int cols
idxArg = kernel.set(idxArg, (int)patch.x); // int PATCH_X
idxArg = kernel.set(idxArg, (int)patch.y); // int PATCH_Y
idxArg = kernel.set(idxArg, (int)winSize.width); // int c_winSize_x
idxArg = kernel.set(idxArg, (int)winSize.height); // int c_winSize_y
idxArg = kernel.set(idxArg, (int)iters); // int c_iters
idxArg = kernel.set(idxArg, (char)calcErr); //char calcErr
#else
SAFE_KERNEL_SET_ARG(idxArg, imageI); //image2d_t I
SAFE_KERNEL_SET_ARG(idxArg, imageJ); //image2d_t J
SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadOnly(prevPts)); // __global const float2* prevPts
SAFE_KERNEL_SET_ARG(idxArg, (int)prevPts.step); // int prevPtsStep
SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadWrite(nextPts)); // __global const float2* nextPts
SAFE_KERNEL_SET_ARG(idxArg, (int)nextPts.step); // int nextPtsStep
SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadWrite(status)); // __global uchar* status
SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadWrite(err)); // __global float* err
SAFE_KERNEL_SET_ARG(idxArg, (int)level); // const int level
SAFE_KERNEL_SET_ARG(idxArg, (int)I.rows); // const int rows
SAFE_KERNEL_SET_ARG(idxArg, (int)I.cols); // const int cols
SAFE_KERNEL_SET_ARG(idxArg, (int)patch.x); // int PATCH_X
SAFE_KERNEL_SET_ARG(idxArg, (int)patch.y); // int PATCH_Y
SAFE_KERNEL_SET_ARG(idxArg, (int)winSize.width); // int c_winSize_x
SAFE_KERNEL_SET_ARG(idxArg, (int)winSize.height); // int c_winSize_y
SAFE_KERNEL_SET_ARG(idxArg, (int)iters); // int c_iters
SAFE_KERNEL_SET_ARG(idxArg, (char)calcErr); //char calcErr
#endif
return kernel.run(2, globalThreads, localThreads, true);
}
private:
inline static bool isDeviceCPU()
{
return (cv::ocl::Device::TYPE_CPU == cv::ocl::Device::getDefault().type());
}
};
static bool ocl_calcOpticalFlowPyrLK(InputArray _prevImg, InputArray _nextImg,
InputArray _prevPts, InputOutputArray _nextPts,
OutputArray _status, OutputArray _err,
Size winSize, int maxLevel,
TermCriteria criteria,
int flags/*, double minEigThreshold*/ )
{
if (0 != (OPTFLOW_LK_GET_MIN_EIGENVALS & flags))
return false;
if (!cv::ocl::Device::getDefault().imageSupport())
return false;
if (_nextImg.size() != _prevImg.size())
return false;
int typePrev = _prevImg.type();
int typeNext = _nextImg.type();
if ((1 != CV_MAT_CN(typePrev)) || (1 != CV_MAT_CN(typeNext)))
return false;
if ((0 != CV_MAT_DEPTH(typePrev)) || (0 != CV_MAT_DEPTH(typeNext)))
return false;
if (_prevPts.empty() || _prevPts.type() != CV_32FC2 || (!_prevPts.isContinuous()))
return false;
if ((1 != _prevPts.size().height) && (1 != _prevPts.size().width))
return false;
size_t npoints = _prevPts.total();
bool useInitialFlow = (0 != (flags & OPTFLOW_USE_INITIAL_FLOW));
if (useInitialFlow)
{
if (_nextPts.empty() || _nextPts.type() != CV_32FC2 || (!_prevPts.isContinuous()))
return false;
if ((1 != _nextPts.size().height) && (1 != _nextPts.size().width))
return false;
if (_nextPts.total() != npoints)
return false;
}
else
{
_nextPts.create(_prevPts.size(), _prevPts.type());
}
PyrLKOpticalFlow opticalFlow;
opticalFlow.winSize = winSize;
opticalFlow.maxLevel = maxLevel;
opticalFlow.iters = criteria.maxCount;
opticalFlow.derivLambda = criteria.epsilon;
opticalFlow.useInitialFlow = useInitialFlow;
if (!opticalFlow.checkParam())
return false;
UMat umatErr;
if (_err.needed())
{
_err.create((int)npoints, 1, CV_32FC1);
umatErr = _err.getUMat();
}
else
umatErr.create((int)npoints, 1, CV_32FC1);
_status.create((int)npoints, 1, CV_8UC1);
UMat umatNextPts = _nextPts.getUMat();
UMat umatStatus = _status.getUMat();
return opticalFlow.sparse(_prevImg.getUMat(), _nextImg.getUMat(), _prevPts.getUMat(), umatNextPts, umatStatus, umatErr);
}
};
void cv::calcOpticalFlowPyrLK( InputArray _prevImg, InputArray _nextImg,
InputArray _prevPts, InputOutputArray _nextPts,
OutputArray _status, OutputArray _err,
@ -597,6 +854,10 @@ void cv::calcOpticalFlowPyrLK( InputArray _prevImg, InputArray _nextImg,
TermCriteria criteria,
int flags, double minEigThreshold )
{
bool use_opencl = ocl::useOpenCL() && (_prevImg.isUMat() || _nextImg.isUMat());
if ( use_opencl && ocl_calcOpticalFlowPyrLK(_prevImg, _nextImg, _prevPts, _nextPts, _status, _err, winSize, maxLevel, criteria, flags/*, minEigThreshold*/))
return;
Mat prevPtsMat = _prevPts.getMat();
const int derivDepth = DataType<cv::detail::deriv_type>::depth;

@ -0,0 +1,583 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Dachuan Zhao, dachuan@multicorewareinc.com
// Yao Wang, bitwangyaoyao@gmail.com
// Xiaopeng Fu, fuxiaopeng2222@163.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#define BUFFER 64
#define BUFFER2 BUFFER>>1
#ifndef WAVE_SIZE
#define WAVE_SIZE 1
#endif
#ifdef CPU
inline void reduce3(float val1, float val2, float val3, __local float* smem1, __local float* smem2, __local float* smem3, int tid)
{
smem1[tid] = val1;
smem2[tid] = val2;
smem3[tid] = val3;
barrier(CLK_LOCAL_MEM_FENCE);
for(int i = BUFFER2; i > 0; i >>= 1)
{
if(tid < i)
{
smem1[tid] += smem1[tid + i];
smem2[tid] += smem2[tid + i];
smem3[tid] += smem3[tid + i];
}
barrier(CLK_LOCAL_MEM_FENCE);
}
}
inline void reduce2(float val1, float val2, volatile __local float* smem1, volatile __local float* smem2, int tid)
{
smem1[tid] = val1;
smem2[tid] = val2;
barrier(CLK_LOCAL_MEM_FENCE);
for(int i = BUFFER2; i > 0; i >>= 1)
{
if(tid < i)
{
smem1[tid] += smem1[tid + i];
smem2[tid] += smem2[tid + i];
}
barrier(CLK_LOCAL_MEM_FENCE);
}
}
inline void reduce1(float val1, volatile __local float* smem1, int tid)
{
smem1[tid] = val1;
barrier(CLK_LOCAL_MEM_FENCE);
for(int i = BUFFER2; i > 0; i >>= 1)
{
if(tid < i)
{
smem1[tid] += smem1[tid + i];
}
barrier(CLK_LOCAL_MEM_FENCE);
}
}
#else
inline void reduce3(float val1, float val2, float val3,
__local volatile float* smem1, __local volatile float* smem2, __local volatile float* smem3, int tid)
{
smem1[tid] = val1;
smem2[tid] = val2;
smem3[tid] = val3;
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 32)
{
smem1[tid] += smem1[tid + 32];
smem2[tid] += smem2[tid + 32];
smem3[tid] += smem3[tid + 32];
#if WAVE_SIZE < 32
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 16)
{
#endif
smem1[tid] += smem1[tid + 16];
smem2[tid] += smem2[tid + 16];
smem3[tid] += smem3[tid + 16];
#if WAVE_SIZE <16
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 8)
{
#endif
smem1[tid] += smem1[tid + 8];
smem2[tid] += smem2[tid + 8];
smem3[tid] += smem3[tid + 8];
smem1[tid] += smem1[tid + 4];
smem2[tid] += smem2[tid + 4];
smem3[tid] += smem3[tid + 4];
smem1[tid] += smem1[tid + 2];
smem2[tid] += smem2[tid + 2];
smem3[tid] += smem3[tid + 2];
smem1[tid] += smem1[tid + 1];
smem2[tid] += smem2[tid + 1];
smem3[tid] += smem3[tid + 1];
}
barrier(CLK_LOCAL_MEM_FENCE);
}
inline void reduce2(float val1, float val2, __local volatile float* smem1, __local volatile float* smem2, int tid)
{
smem1[tid] = val1;
smem2[tid] = val2;
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 32)
{
smem1[tid] += smem1[tid + 32];
smem2[tid] += smem2[tid + 32];
#if WAVE_SIZE < 32
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 16)
{
#endif
smem1[tid] += smem1[tid + 16];
smem2[tid] += smem2[tid + 16];
#if WAVE_SIZE <16
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 8)
{
#endif
smem1[tid] += smem1[tid + 8];
smem2[tid] += smem2[tid + 8];
smem1[tid] += smem1[tid + 4];
smem2[tid] += smem2[tid + 4];
smem1[tid] += smem1[tid + 2];
smem2[tid] += smem2[tid + 2];
smem1[tid] += smem1[tid + 1];
smem2[tid] += smem2[tid + 1];
}
barrier(CLK_LOCAL_MEM_FENCE);
}
inline void reduce1(float val1, __local volatile float* smem1, int tid)
{
smem1[tid] = val1;
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 32)
{
smem1[tid] += smem1[tid + 32];
#if WAVE_SIZE < 32
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 16)
{
#endif
smem1[tid] += smem1[tid + 16];
#if WAVE_SIZE <16
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 8)
{
#endif
smem1[tid] += smem1[tid + 8];
smem1[tid] += smem1[tid + 4];
smem1[tid] += smem1[tid + 2];
smem1[tid] += smem1[tid + 1];
}
barrier(CLK_LOCAL_MEM_FENCE);
}
#endif
#define SCALE (1.0f / (1 << 20))
#define THRESHOLD 0.01f
// Image read mode
__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR;
inline void SetPatch(image2d_t I, float x, float y,
float* Pch, float* Dx, float* Dy,
float* A11, float* A12, float* A22)
{
*Pch = read_imagef(I, sampler, (float2)(x, y)).x;
float dIdx = 3.0f * read_imagef(I, sampler, (float2)(x + 1, y - 1)).x + 10.0f * read_imagef(I, sampler, (float2)(x + 1, y)).x + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y + 1)).x -
(3.0f * read_imagef(I, sampler, (float2)(x - 1, y - 1)).x + 10.0f * read_imagef(I, sampler, (float2)(x - 1, y)).x + 3.0f * read_imagef(I, sampler, (float2)(x - 1, y + 1)).x);
float dIdy = 3.0f * read_imagef(I, sampler, (float2)(x - 1, y + 1)).x + 10.0f * read_imagef(I, sampler, (float2)(x, y + 1)).x + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y + 1)).x -
(3.0f * read_imagef(I, sampler, (float2)(x - 1, y - 1)).x + 10.0f * read_imagef(I, sampler, (float2)(x, y - 1)).x + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y - 1)).x);
*Dx = dIdx;
*Dy = dIdy;
*A11 += dIdx * dIdx;
*A12 += dIdx * dIdy;
*A22 += dIdy * dIdy;
}
inline void GetPatch(image2d_t J, float x, float y,
float* Pch, float* Dx, float* Dy,
float* b1, float* b2)
{
float J_val = read_imagef(J, sampler, (float2)(x, y)).x;
float diff = (J_val - *Pch) * 32.0f;
*b1 += diff**Dx;
*b2 += diff**Dy;
}
inline void GetError(image2d_t J, const float x, const float y, const float* Pch, float* errval)
{
float diff = read_imagef(J, sampler, (float2)(x,y)).x-*Pch;
*errval += fabs(diff);
}
inline void SetPatch4(image2d_t I, const float x, const float y,
float4* Pch, float4* Dx, float4* Dy,
float* A11, float* A12, float* A22)
{
*Pch = read_imagef(I, sampler, (float2)(x, y));
float4 dIdx = 3.0f * read_imagef(I, sampler, (float2)(x + 1, y - 1)) + 10.0f * read_imagef(I, sampler, (float2)(x + 1, y)) + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y + 1)) -
(3.0f * read_imagef(I, sampler, (float2)(x - 1, y - 1)) + 10.0f * read_imagef(I, sampler, (float2)(x - 1, y)) + 3.0f * read_imagef(I, sampler, (float2)(x - 1, y + 1)));
float4 dIdy = 3.0f * read_imagef(I, sampler, (float2)(x - 1, y + 1)) + 10.0f * read_imagef(I, sampler, (float2)(x, y + 1)) + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y + 1)) -
(3.0f * read_imagef(I, sampler, (float2)(x - 1, y - 1)) + 10.0f * read_imagef(I, sampler, (float2)(x, y - 1)) + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y - 1)));
*Dx = dIdx;
*Dy = dIdy;
float4 sqIdx = dIdx * dIdx;
*A11 += sqIdx.x + sqIdx.y + sqIdx.z;
sqIdx = dIdx * dIdy;
*A12 += sqIdx.x + sqIdx.y + sqIdx.z;
sqIdx = dIdy * dIdy;
*A22 += sqIdx.x + sqIdx.y + sqIdx.z;
}
inline void GetPatch4(image2d_t J, const float x, const float y,
const float4* Pch, const float4* Dx, const float4* Dy,
float* b1, float* b2)
{
float4 J_val = read_imagef(J, sampler, (float2)(x, y));
float4 diff = (J_val - *Pch) * 32.0f;
float4 xdiff = diff* *Dx;
*b1 += xdiff.x + xdiff.y + xdiff.z;
xdiff = diff* *Dy;
*b2 += xdiff.x + xdiff.y + xdiff.z;
}
inline void GetError4(image2d_t J, const float x, const float y, const float4* Pch, float* errval)
{
float4 diff = read_imagef(J, sampler, (float2)(x,y))-*Pch;
*errval += fabs(diff.x) + fabs(diff.y) + fabs(diff.z);
}
#define GRIDSIZE 3
__kernel void lkSparse(image2d_t I, image2d_t J,
__global const float2* prevPts, int prevPtsStep, __global float2* nextPts, int nextPtsStep, __global uchar* status, __global float* err,
const int level, const int rows, const int cols, int PATCH_X, int PATCH_Y, int c_winSize_x, int c_winSize_y, int c_iters, char calcErr)
{
__local float smem1[BUFFER];
__local float smem2[BUFFER];
__local float smem3[BUFFER];
unsigned int xid=get_local_id(0);
unsigned int yid=get_local_id(1);
unsigned int gid=get_group_id(0);
unsigned int xsize=get_local_size(0);
unsigned int ysize=get_local_size(1);
int xBase, yBase, k;
float2 c_halfWin = (float2)((c_winSize_x - 1)>>1, (c_winSize_y - 1)>>1);
const int tid = mad24(yid, xsize, xid);
float2 prevPt = prevPts[gid] / (float2)(1 << level);
if (prevPt.x < 0 || prevPt.x >= cols || prevPt.y < 0 || prevPt.y >= rows)
{
if (tid == 0 && level == 0)
{
status[gid] = 0;
}
return;
}
prevPt -= c_halfWin;
// extract the patch from the first image, compute covariation matrix of derivatives
float A11 = 0;
float A12 = 0;
float A22 = 0;
float I_patch[GRIDSIZE][GRIDSIZE];
float dIdx_patch[GRIDSIZE][GRIDSIZE];
float dIdy_patch[GRIDSIZE][GRIDSIZE];
yBase=yid;
{
xBase=xid;
SetPatch(I, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
&I_patch[0][0], &dIdx_patch[0][0], &dIdy_patch[0][0],
&A11, &A12, &A22);
xBase+=xsize;
SetPatch(I, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
&I_patch[0][1], &dIdx_patch[0][1], &dIdy_patch[0][1],
&A11, &A12, &A22);
xBase+=xsize;
if(xBase<c_winSize_x)
SetPatch(I, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
&I_patch[0][2], &dIdx_patch[0][2], &dIdy_patch[0][2],
&A11, &A12, &A22);
}
yBase+=ysize;
{
xBase=xid;
SetPatch(I, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
&I_patch[1][0], &dIdx_patch[1][0], &dIdy_patch[1][0],
&A11, &A12, &A22);
xBase+=xsize;
SetPatch(I, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
&I_patch[1][1], &dIdx_patch[1][1], &dIdy_patch[1][1],
&A11, &A12, &A22);
xBase+=xsize;
if(xBase<c_winSize_x)
SetPatch(I, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
&I_patch[1][2], &dIdx_patch[1][2], &dIdy_patch[1][2],
&A11, &A12, &A22);
}
yBase+=ysize;
if(yBase<c_winSize_y)
{
xBase=xid;
SetPatch(I, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
&I_patch[2][0], &dIdx_patch[2][0], &dIdy_patch[2][0],
&A11, &A12, &A22);
xBase+=xsize;
SetPatch(I, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
&I_patch[2][1], &dIdx_patch[2][1], &dIdy_patch[2][1],
&A11, &A12, &A22);
xBase+=xsize;
if(xBase<c_winSize_x)
SetPatch(I, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
&I_patch[2][2], &dIdx_patch[2][2], &dIdy_patch[2][2],
&A11, &A12, &A22);
}
reduce3(A11, A12, A22, smem1, smem2, smem3, tid);
A11 = smem1[0];
A12 = smem2[0];
A22 = smem3[0];
barrier(CLK_LOCAL_MEM_FENCE);
float D = A11 * A22 - A12 * A12;
if (D < 1.192092896e-07f)
{
if (tid == 0 && level == 0)
status[gid] = 0;
return;
}
A11 /= D;
A12 /= D;
A22 /= D;
prevPt = nextPts[gid] * 2.0f - c_halfWin;
for (k = 0; k < c_iters; ++k)
{
if (prevPt.x < -c_halfWin.x || prevPt.x >= cols || prevPt.y < -c_halfWin.y || prevPt.y >= rows)
{
if (tid == 0 && level == 0)
status[gid] = 0;
return;
}
float b1 = 0;
float b2 = 0;
yBase=yid;
{
xBase=xid;
GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
&I_patch[0][0], &dIdx_patch[0][0], &dIdy_patch[0][0],
&b1, &b2);
xBase+=xsize;
GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
&I_patch[0][1], &dIdx_patch[0][1], &dIdy_patch[0][1],
&b1, &b2);
xBase+=xsize;
if(xBase<c_winSize_x)
GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
&I_patch[0][2], &dIdx_patch[0][2], &dIdy_patch[0][2],
&b1, &b2);
}
yBase+=ysize;
{
xBase=xid;
GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
&I_patch[1][0], &dIdx_patch[1][0], &dIdy_patch[1][0],
&b1, &b2);
xBase+=xsize;
GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
&I_patch[1][1], &dIdx_patch[1][1], &dIdy_patch[1][1],
&b1, &b2);
xBase+=xsize;
if(xBase<c_winSize_x)
GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
&I_patch[1][2], &dIdx_patch[1][2], &dIdy_patch[1][2],
&b1, &b2);
}
yBase+=ysize;
if(yBase<c_winSize_y)
{
xBase=xid;
GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
&I_patch[2][0], &dIdx_patch[2][0], &dIdy_patch[2][0],
&b1, &b2);
xBase+=xsize;
GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
&I_patch[2][1], &dIdx_patch[2][1], &dIdy_patch[2][1],
&b1, &b2);
xBase+=xsize;
if(xBase<c_winSize_x)
GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
&I_patch[2][2], &dIdx_patch[2][2], &dIdy_patch[2][2],
&b1, &b2);
}
reduce2(b1, b2, smem1, smem2, tid);
b1 = smem1[0];
b2 = smem2[0];
barrier(CLK_LOCAL_MEM_FENCE);
float2 delta;
delta.x = A12 * b2 - A22 * b1;
delta.y = A12 * b1 - A11 * b2;
prevPt += delta;
if (fabs(delta.x) < THRESHOLD && fabs(delta.y) < THRESHOLD)
break;
}
D = 0.0f;
if (calcErr)
{
yBase=yid;
{
xBase=xid;
GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
&I_patch[0][0], &D);
xBase+=xsize;
GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
&I_patch[0][1], &D);
xBase+=xsize;
if(xBase<c_winSize_x)
GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
&I_patch[0][2], &D);
}
yBase+=ysize;
{
xBase=xid;
GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
&I_patch[1][0], &D);
xBase+=xsize;
GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
&I_patch[1][1], &D);
xBase+=xsize;
if(xBase<c_winSize_x)
GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
&I_patch[1][2], &D);
}
yBase+=ysize;
if(yBase<c_winSize_y)
{
xBase=xid;
GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
&I_patch[2][0], &D);
xBase+=xsize;
GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
&I_patch[2][1], &D);
xBase+=xsize;
if(xBase<c_winSize_x)
GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
&I_patch[2][2], &D);
}
reduce1(D, smem1, tid);
}
if (tid == 0)
{
prevPt += c_halfWin;
nextPts[gid] = prevPt;
if (calcErr)
err[gid] = smem1[0] / (float)(c_winSize_x * c_winSize_y);
}
}

@ -0,0 +1,143 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "test_precomp.hpp"
#include "opencv2/ts/ocl_test.hpp"
#ifdef HAVE_OPENCL
namespace cvtest {
namespace ocl {
/////////////////////////////////////////////////////////////////////////////////////////////////
// PyrLKOpticalFlow
PARAM_TEST_CASE(PyrLKOpticalFlow, int, int)
{
Size winSize;
int maxLevel;
TermCriteria criteria;
int flags;
double minEigThreshold;
virtual void SetUp()
{
winSize = Size(GET_PARAM(0), GET_PARAM(0));
maxLevel = GET_PARAM(1);
criteria = TermCriteria(TermCriteria::COUNT+TermCriteria::EPS, 30, 0.01);
flags = 0;
minEigThreshold = 1e-4f;
}
};
OCL_TEST_P(PyrLKOpticalFlow, Mat)
{
static const int npoints = 1000;
static const float eps = 0.03f;
cv::Mat frame0 = readImage("optflow/RubberWhale1.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame0.empty());
UMat umatFrame0; frame0.copyTo(umatFrame0);
cv::Mat frame1 = readImage("optflow/RubberWhale2.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame1.empty());
UMat umatFrame1; frame1.copyTo(umatFrame1);
std::vector<cv::Point2f> pts;
cv::goodFeaturesToTrack(frame0, pts, npoints, 0.01, 0.0);
std::vector<cv::Point2f> cpuNextPts;
std::vector<unsigned char> cpuStatusCPU;
std::vector<float> cpuErr;
OCL_OFF(cv::calcOpticalFlowPyrLK(frame0, frame1, pts, cpuNextPts, cpuStatusCPU, cpuErr, winSize, maxLevel, criteria, flags, minEigThreshold));
UMat umatNextPts, umatStatus, umatErr;
OCL_ON(cv::calcOpticalFlowPyrLK(umatFrame0, umatFrame1, pts, umatNextPts, umatStatus, umatErr, winSize, maxLevel, criteria, flags, minEigThreshold));
std::vector<cv::Point2f> nextPts; umatNextPts.reshape(2, 1).copyTo(nextPts);
std::vector<unsigned char> status; umatStatus.reshape(1, 1).copyTo(status);
std::vector<float> err; umatErr.reshape(1, 1).copyTo(err);
ASSERT_EQ(cpuNextPts.size(), nextPts.size());
ASSERT_EQ(cpuStatusCPU.size(), status.size());
size_t mistmatch = 0;
for (size_t i = 0; i < nextPts.size(); ++i)
{
if (status[i] != cpuStatusCPU[i])
{
++mistmatch;
continue;
}
if (status[i])
{
cv::Point2i a = nextPts[i];
cv::Point2i b = cpuNextPts[i];
bool eq = std::abs(a.x - b.x) < 1 && std::abs(a.y - b.y) < 1;
float errdiff = 0.0f;
if (!eq || errdiff > 1e-1)
++mistmatch;
}
}
double bad_ratio = static_cast<double>(mistmatch) / (nextPts.size());
ASSERT_LE(bad_ratio, eps);
}
OCL_INSTANTIATE_TEST_CASE_P(Video, PyrLKOpticalFlow,
Combine(
Values(21, 25),
Values(3, 5)
)
);
} } // namespace cvtest::ocl
#endif // HAVE_OPENCL