Merge pull request #2155 from vbystricky:ocl_calcOpticalFlowPyrLK
This commit is contained in:
commit
a64d3c1744
modules
core
video
@ -60,6 +60,7 @@ class CV_EXPORTS Program;
|
||||
class CV_EXPORTS ProgramSource2;
|
||||
class CV_EXPORTS Queue;
|
||||
class CV_EXPORTS PlatformInfo2;
|
||||
class CV_EXPORTS Image2D;
|
||||
|
||||
class CV_EXPORTS Device
|
||||
{
|
||||
@ -89,6 +90,7 @@ public:
|
||||
String vendor() const;
|
||||
String OpenCL_C_Version() const;
|
||||
String OpenCLVersion() const;
|
||||
String deviceVersion() const;
|
||||
String driverVersion() const;
|
||||
void* ptr() const;
|
||||
|
||||
@ -325,6 +327,7 @@ public:
|
||||
const String& buildopts, String* errmsg=0);
|
||||
|
||||
int set(int i, const void* value, size_t sz);
|
||||
int set(int i, const Image2D& image2D);
|
||||
int set(int i, const UMat& m);
|
||||
int set(int i, const KernelArg& arg);
|
||||
template<typename _Tp> int set(int i, const _Tp& value)
|
||||
@ -574,6 +577,19 @@ CV_EXPORTS const char* typeToStr(int t);
|
||||
CV_EXPORTS const char* memopTypeToStr(int t);
|
||||
CV_EXPORTS void getPlatfomsInfo(std::vector<PlatformInfo2>& platform_info);
|
||||
|
||||
class CV_EXPORTS Image2D
|
||||
{
|
||||
public:
|
||||
Image2D();
|
||||
Image2D(const UMat &src);
|
||||
~Image2D();
|
||||
|
||||
void* ptr() const;
|
||||
protected:
|
||||
struct Impl;
|
||||
Impl* p;
|
||||
};
|
||||
|
||||
}}
|
||||
|
||||
#endif
|
||||
|
@ -821,6 +821,7 @@ OCL_FUNC_P(cl_mem, clCreateSubBuffer,
|
||||
const void * buffer_create_info,
|
||||
cl_int * errcode_ret),
|
||||
(buffer, flags, buffer_create_type, buffer_create_info, errcode_ret))
|
||||
*/
|
||||
|
||||
OCL_FUNC_P(cl_mem, clCreateImage,
|
||||
(cl_context context,
|
||||
@ -831,6 +832,18 @@ OCL_FUNC_P(cl_mem, clCreateImage,
|
||||
cl_int * errcode_ret),
|
||||
(context, flags, image_format, image_desc, host_ptr, errcode_ret))
|
||||
|
||||
OCL_FUNC_P(cl_mem, clCreateImage2D,
|
||||
(cl_context context,
|
||||
cl_mem_flags flags,
|
||||
const cl_image_format * image_format,
|
||||
size_t image_width,
|
||||
size_t image_height,
|
||||
size_t image_row_pitch,
|
||||
void * host_ptr,
|
||||
cl_int *errcode_ret),
|
||||
(context, flags, image_format, image_width, image_height, image_row_pitch, host_ptr, errcode_ret))
|
||||
|
||||
/*
|
||||
OCL_FUNC(cl_int, clGetSupportedImageFormats,
|
||||
(cl_context context,
|
||||
cl_mem_flags flags,
|
||||
@ -945,21 +958,26 @@ OCL_FUNC(cl_int, clEnqueueCopyImageToBuffer,
|
||||
cl_event * event),
|
||||
(command_queue, src_image, dst_buffer, src_origin, region, dst_offset,
|
||||
num_events_in_wait_list, event_wait_list, event))
|
||||
*/
|
||||
|
||||
OCL_FUNC(cl_int, clEnqueueCopyBufferToImage,
|
||||
(cl_command_queue command_queue,
|
||||
cl_mem src_buffer,
|
||||
cl_mem dst_image,
|
||||
size_t src_offset,
|
||||
const size_t * dst_origin[3],
|
||||
const size_t * region[3],
|
||||
const size_t dst_origin[3],
|
||||
const size_t region[3],
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event),
|
||||
(command_queue, src_buffer, dst_image, src_offset, dst_origin,
|
||||
region, num_events_in_wait_list, event_wait_list, event))
|
||||
|
||||
OCL_FUNC(cl_int, clFlush,
|
||||
(cl_command_queue command_queue),
|
||||
(command_queue))
|
||||
|
||||
/*
|
||||
OCL_FUNC_P(void*, clEnqueueMapImage,
|
||||
(cl_command_queue command_queue,
|
||||
cl_mem image,
|
||||
@ -976,7 +994,9 @@ OCL_FUNC_P(void*, clEnqueueMapImage,
|
||||
(command_queue, image, blocking_map, map_flags, origin, region,
|
||||
image_row_pitch, image_slice_pitch, num_events_in_wait_list,
|
||||
event_wait_list, event, errcode_ret))
|
||||
*/
|
||||
|
||||
/*
|
||||
OCL_FUNC(cl_int, clRetainProgram, (cl_program program), (program))
|
||||
|
||||
OCL_FUNC(cl_int, clGetKernelInfo,
|
||||
@ -1705,6 +1725,9 @@ String Device::OpenCL_C_Version() const
|
||||
String Device::OpenCLVersion() const
|
||||
{ return p ? p->getStrProp(CL_DEVICE_EXTENSIONS) : String(); }
|
||||
|
||||
String Device::deviceVersion() const
|
||||
{ return p ? p->getStrProp(CL_DEVICE_VERSION) : String(); }
|
||||
|
||||
String Device::driverVersion() const
|
||||
{ return p ? p->getStrProp(CL_DRIVER_VERSION) : String(); }
|
||||
|
||||
@ -2689,6 +2712,12 @@ int Kernel::set(int i, const void* value, size_t sz)
|
||||
return i+1;
|
||||
}
|
||||
|
||||
int Kernel::set(int i, const Image2D& image2D)
|
||||
{
|
||||
cl_mem h = (cl_mem)image2D.ptr();
|
||||
return set(i, &h, sizeof(h));
|
||||
}
|
||||
|
||||
int Kernel::set(int i, const UMat& m)
|
||||
{
|
||||
return set(i, KernelArg(KernelArg::READ_WRITE, (UMat*)&m, 0, 0));
|
||||
@ -3785,4 +3814,151 @@ const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf)
|
||||
return buf;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// deviceVersion has format
|
||||
// OpenCL<space><major_version.minor_version><space><vendor-specific information>
|
||||
// by specification
|
||||
// http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clGetDeviceInfo.html
|
||||
// http://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clGetDeviceInfo.html
|
||||
static void parseDeviceVersion(const String &deviceVersion, int &major, int &minor)
|
||||
{
|
||||
major = minor = 0;
|
||||
if (10 >= deviceVersion.length())
|
||||
return;
|
||||
const char *pstr = deviceVersion.c_str();
|
||||
if (0 != strncmp(pstr, "OpenCL ", 7))
|
||||
return;
|
||||
size_t ppos = deviceVersion.find('.', 7);
|
||||
if (String::npos == ppos)
|
||||
return;
|
||||
String temp = deviceVersion.substr(7, ppos - 7);
|
||||
major = atoi(temp.c_str());
|
||||
temp = deviceVersion.substr(ppos + 1);
|
||||
minor = atoi(temp.c_str());
|
||||
}
|
||||
|
||||
struct Image2D::Impl
|
||||
{
|
||||
Impl(const UMat &src)
|
||||
{
|
||||
handle = 0;
|
||||
refcount = 1;
|
||||
init(src);
|
||||
}
|
||||
~Impl()
|
||||
{
|
||||
if (handle)
|
||||
clReleaseMemObject(handle);
|
||||
}
|
||||
void init(const UMat &src)
|
||||
{
|
||||
cl_image_format format;
|
||||
int err;
|
||||
int depth = src.depth();
|
||||
int channels = src.channels();
|
||||
|
||||
switch(depth)
|
||||
{
|
||||
case CV_8U:
|
||||
format.image_channel_data_type = CL_UNSIGNED_INT8;
|
||||
break;
|
||||
case CV_32S:
|
||||
format.image_channel_data_type = CL_UNSIGNED_INT32;
|
||||
break;
|
||||
case CV_32F:
|
||||
format.image_channel_data_type = CL_FLOAT;
|
||||
break;
|
||||
default:
|
||||
CV_Error(-1, "Image forma is not supported");
|
||||
break;
|
||||
}
|
||||
switch(channels)
|
||||
{
|
||||
case 1:
|
||||
format.image_channel_order = CL_R;
|
||||
break;
|
||||
case 3:
|
||||
format.image_channel_order = CL_RGB;
|
||||
break;
|
||||
case 4:
|
||||
format.image_channel_order = CL_RGBA;
|
||||
break;
|
||||
default:
|
||||
CV_Error(-1, "Image format is not supported");
|
||||
break;
|
||||
}
|
||||
#ifdef CL_VERSION_1_2
|
||||
//this enables backwards portability to
|
||||
//run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support
|
||||
int minor, major;
|
||||
parseDeviceVersion(Device::getDefault().deviceVersion(), major, minor);
|
||||
if ((1 < major) || ((1 == major) && (2 <= minor)))
|
||||
{
|
||||
cl_image_desc desc;
|
||||
desc.image_type = CL_MEM_OBJECT_IMAGE2D;
|
||||
desc.image_width = src.cols;
|
||||
desc.image_height = src.rows;
|
||||
desc.image_depth = 0;
|
||||
desc.image_array_size = 1;
|
||||
desc.image_row_pitch = 0;
|
||||
desc.image_slice_pitch = 0;
|
||||
desc.buffer = NULL;
|
||||
desc.num_mip_levels = 0;
|
||||
desc.num_samples = 0;
|
||||
handle = clCreateImage((cl_context)Context2::getDefault().ptr(), CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
handle = clCreateImage2D((cl_context)Context2::getDefault().ptr(), CL_MEM_READ_WRITE, &format, src.cols, src.rows, 0, NULL, &err);
|
||||
}
|
||||
size_t origin[] = { 0, 0, 0 };
|
||||
size_t region[] = { src.cols, src.rows, 1 };
|
||||
|
||||
cl_mem devData;
|
||||
if (!src.isContinuous())
|
||||
{
|
||||
devData = clCreateBuffer((cl_context)Context2::getDefault().ptr(), CL_MEM_READ_ONLY, src.cols * src.rows * src.elemSize(), NULL, NULL);
|
||||
const size_t roi[3] = {src.cols * src.elemSize(), src.rows, 1};
|
||||
clEnqueueCopyBufferRect((cl_command_queue)Queue::getDefault().ptr(), (cl_mem)src.handle(ACCESS_READ), devData, origin, origin,
|
||||
roi, src.step, 0, src.cols * src.elemSize(), 0, 0, NULL, NULL);
|
||||
clFlush((cl_command_queue)Queue::getDefault().ptr());
|
||||
}
|
||||
else
|
||||
{
|
||||
devData = (cl_mem)src.handle(ACCESS_READ);
|
||||
}
|
||||
|
||||
clEnqueueCopyBufferToImage((cl_command_queue)Queue::getDefault().ptr(), devData, handle, 0, origin, region, 0, NULL, 0);
|
||||
if (!src.isContinuous())
|
||||
{
|
||||
clFlush((cl_command_queue)Queue::getDefault().ptr());
|
||||
clReleaseMemObject(devData);
|
||||
}
|
||||
}
|
||||
|
||||
IMPLEMENT_REFCOUNTABLE();
|
||||
|
||||
cl_mem handle;
|
||||
};
|
||||
|
||||
Image2D::Image2D()
|
||||
{
|
||||
p = NULL;
|
||||
}
|
||||
Image2D::Image2D(const UMat &src)
|
||||
{
|
||||
p = new Impl(src);
|
||||
}
|
||||
Image2D::~Image2D()
|
||||
{
|
||||
if (p)
|
||||
p->release();
|
||||
}
|
||||
|
||||
void* Image2D::ptr() const
|
||||
{
|
||||
return p ? p->handle : 0;
|
||||
}
|
||||
|
||||
}}
|
||||
|
102
modules/video/perf/opencl/perf_optflow_pyrlk.cpp
Normal file
102
modules/video/perf/opencl/perf_optflow_pyrlk.cpp
Normal file
@ -0,0 +1,102 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Fangfang Bai, fangfang@multicorewareinc.com
|
||||
// Jin Ma, jin@multicorewareinc.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "perf_precomp.hpp"
|
||||
#include "opencv2/ts/ocl_perf.hpp"
|
||||
|
||||
using std::tr1::make_tuple;
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
|
||||
namespace cvtest {
|
||||
namespace ocl {
|
||||
|
||||
///////////// FarnebackOpticalFlow ////////////////////////
|
||||
CV_ENUM(farneFlagType, 0, OPTFLOW_FARNEBACK_GAUSSIAN)
|
||||
|
||||
typedef tuple< int > PyrLKOpticalFlowParams;
|
||||
typedef TestBaseWithParam<PyrLKOpticalFlowParams> PyrLKOpticalFlowFixture;
|
||||
|
||||
OCL_PERF_TEST_P(PyrLKOpticalFlowFixture, PyrLKOpticalFlow,
|
||||
::testing::Values(1000, 2000, 4000)
|
||||
)
|
||||
{
|
||||
Mat frame0 = imread(getDataPath("gpu/opticalflow/rubberwhale1.png"), cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(frame0.empty()) << "can't load rubberwhale1.png";
|
||||
|
||||
Mat frame1 = imread(getDataPath("gpu/opticalflow/rubberwhale2.png"), cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(frame1.empty()) << "can't load rubberwhale2.png";
|
||||
|
||||
UMat uFrame0; frame0.copyTo(uFrame0);
|
||||
UMat uFrame1; frame1.copyTo(uFrame1);
|
||||
|
||||
const Size winSize = Size(21, 21);
|
||||
const int maxLevel = 3;
|
||||
const TermCriteria criteria = TermCriteria(TermCriteria::COUNT+TermCriteria::EPS, 30, 0.01);
|
||||
const int flags = 0;
|
||||
const float minEigThreshold = 1e-4f;
|
||||
const double eps = 1.0;
|
||||
|
||||
const PyrLKOpticalFlowParams params = GetParam();
|
||||
const int pointsCount = get<0>(params);
|
||||
|
||||
vector<Point2f> pts, nextPts;
|
||||
vector<unsigned char> status;
|
||||
vector<float> err;
|
||||
goodFeaturesToTrack(frame0, pts, pointsCount, 0.01, 0.0);
|
||||
Mat ptsMat(1, static_cast<int>(pts.size()), CV_32FC2, (void *)&pts[0]);
|
||||
|
||||
declare.in(uFrame0, uFrame1, WARMUP_READ);
|
||||
UMat uNextPts, uStatus, uErr;
|
||||
OCL_TEST_CYCLE()
|
||||
cv::calcOpticalFlowPyrLK(uFrame0, uFrame1, pts, uNextPts, uStatus, uErr, winSize, maxLevel, criteria, flags, minEigThreshold);
|
||||
|
||||
SANITY_CHECK(uNextPts, eps);
|
||||
}
|
||||
|
||||
} } // namespace cvtest::ocl
|
||||
|
||||
#endif // HAVE_OPENCL
|
@ -43,6 +43,7 @@
|
||||
#include <float.h>
|
||||
#include <stdio.h>
|
||||
#include "lkpyramid.hpp"
|
||||
#include "opencl_kernels.hpp"
|
||||
|
||||
#define CV_DESCALE(x,n) (((x) + (1 << ((n)-1))) >> (n))
|
||||
|
||||
@ -590,6 +591,262 @@ int cv::buildOpticalFlowPyramid(InputArray _img, OutputArrayOfArrays pyramid, Si
|
||||
return maxLevel;
|
||||
}
|
||||
|
||||
namespace cv
|
||||
{
|
||||
class PyrLKOpticalFlow
|
||||
{
|
||||
struct dim3
|
||||
{
|
||||
unsigned int x, y, z;
|
||||
};
|
||||
public:
|
||||
PyrLKOpticalFlow()
|
||||
{
|
||||
winSize = Size(21, 21);
|
||||
maxLevel = 3;
|
||||
iters = 30;
|
||||
derivLambda = 0.5;
|
||||
useInitialFlow = false;
|
||||
}
|
||||
|
||||
bool checkParam()
|
||||
{
|
||||
iters = std::min(std::max(iters, 0), 100);
|
||||
|
||||
derivLambda = std::min(std::max(derivLambda, 0.0), 1.0);
|
||||
if (derivLambda < 0)
|
||||
return false;
|
||||
if (maxLevel < 0 || winSize.width <= 2 || winSize.height <= 2)
|
||||
return false;
|
||||
calcPatchSize();
|
||||
if (patch.x <= 0 || patch.x >= 6 || patch.y <= 0 || patch.y >= 6)
|
||||
return false;
|
||||
if (!initWaveSize())
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool sparse(const UMat &prevImg, const UMat &nextImg, const UMat &prevPts, UMat &nextPts, UMat &status, UMat &err)
|
||||
{
|
||||
if (!checkParam())
|
||||
return false;
|
||||
|
||||
UMat temp1 = (useInitialFlow ? nextPts : prevPts).reshape(1);
|
||||
UMat temp2 = nextPts.reshape(1);
|
||||
multiply(1.0f / (1 << maxLevel) /2.0f, temp1, temp2);
|
||||
|
||||
status.setTo(Scalar::all(1));
|
||||
|
||||
// build the image pyramids.
|
||||
std::vector<UMat> prevPyr; prevPyr.resize(maxLevel + 1);
|
||||
std::vector<UMat> nextPyr; nextPyr.resize(maxLevel + 1);
|
||||
|
||||
prevImg.convertTo(prevPyr[0], CV_32F);
|
||||
nextImg.convertTo(nextPyr[0], CV_32F);
|
||||
|
||||
for (int level = 1; level <= maxLevel; ++level)
|
||||
{
|
||||
pyrDown(prevPyr[level - 1], prevPyr[level]);
|
||||
pyrDown(nextPyr[level - 1], nextPyr[level]);
|
||||
}
|
||||
|
||||
// dI/dx ~ Ix, dI/dy ~ Iy
|
||||
for (int level = maxLevel; level >= 0; level--)
|
||||
{
|
||||
if (!lkSparse_run(prevPyr[level], nextPyr[level], prevPts,
|
||||
nextPts, status, err,
|
||||
prevPts.cols, level))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
Size winSize;
|
||||
int maxLevel;
|
||||
int iters;
|
||||
double derivLambda;
|
||||
bool useInitialFlow;
|
||||
|
||||
private:
|
||||
int waveSize;
|
||||
bool initWaveSize()
|
||||
{
|
||||
waveSize = 1;
|
||||
if (isDeviceCPU())
|
||||
return true;
|
||||
|
||||
ocl::Kernel kernel;
|
||||
if (!kernel.create("lkSparse", cv::ocl::video::pyrlk_oclsrc, ""))
|
||||
return false;
|
||||
waveSize = (int)kernel.preferedWorkGroupSizeMultiple();
|
||||
return true;
|
||||
}
|
||||
dim3 patch;
|
||||
void calcPatchSize()
|
||||
{
|
||||
dim3 block;
|
||||
|
||||
if (winSize.width > 32 && winSize.width > 2 * winSize.height)
|
||||
{
|
||||
block.x = 32;
|
||||
block.y = 8;
|
||||
}
|
||||
else
|
||||
{
|
||||
block.x = 16;
|
||||
block.y = 16;
|
||||
}
|
||||
|
||||
patch.x = (winSize.width + block.x - 1) / block.x;
|
||||
patch.y = (winSize.height + block.y - 1) / block.y;
|
||||
|
||||
block.z = patch.z = 1;
|
||||
}
|
||||
|
||||
#define SAFE_KERNEL_SET_ARG(idx, arg) \
|
||||
{\
|
||||
int idxNew = kernel.set(idx, arg);\
|
||||
if (-1 == idxNew)\
|
||||
{\
|
||||
printf("lkSparse_run can't setup argument index = %d to kernel\n", idx);\
|
||||
return false;\
|
||||
}\
|
||||
idx = idxNew;\
|
||||
}
|
||||
bool lkSparse_run(UMat &I, UMat &J, const UMat &prevPts, UMat &nextPts, UMat &status, UMat& err,
|
||||
int ptcount, int level)
|
||||
{
|
||||
size_t localThreads[3] = { 8, 8};
|
||||
size_t globalThreads[3] = { 8 * ptcount, 8};
|
||||
char calcErr = (0 == level) ? 1 : 0;
|
||||
|
||||
cv::String build_options;
|
||||
if (isDeviceCPU())
|
||||
build_options = " -D CPU";
|
||||
else
|
||||
build_options = cv::format("-D WAVE_SIZE=%d", waveSize);
|
||||
|
||||
ocl::Kernel kernel;
|
||||
if (!kernel.create("lkSparse", cv::ocl::video::pyrlk_oclsrc, build_options))
|
||||
return false;
|
||||
|
||||
ocl::Image2D imageI(I);
|
||||
ocl::Image2D imageJ(J);
|
||||
int idxArg = 0;
|
||||
#if 0
|
||||
idxArg = kernel.set(idxArg, imageI); //image2d_t I
|
||||
idxArg = kernel.set(idxArg, imageJ); //image2d_t J
|
||||
idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(prevPts)); // __global const float2* prevPts
|
||||
idxArg = kernel.set(idxArg, (int)prevPts.step); // int prevPtsStep
|
||||
idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(nextPts)); // __global const float2* nextPts
|
||||
idxArg = kernel.set(idxArg, (int)nextPts.step); // int nextPtsStep
|
||||
idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(status)); // __global uchar* status
|
||||
idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(err)); // __global float* err
|
||||
idxArg = kernel.set(idxArg, (int)level); // const int level
|
||||
idxArg = kernel.set(idxArg, (int)I.rows); // const int rows
|
||||
idxArg = kernel.set(idxArg, (int)I.cols); // const int cols
|
||||
idxArg = kernel.set(idxArg, (int)patch.x); // int PATCH_X
|
||||
idxArg = kernel.set(idxArg, (int)patch.y); // int PATCH_Y
|
||||
idxArg = kernel.set(idxArg, (int)winSize.width); // int c_winSize_x
|
||||
idxArg = kernel.set(idxArg, (int)winSize.height); // int c_winSize_y
|
||||
idxArg = kernel.set(idxArg, (int)iters); // int c_iters
|
||||
idxArg = kernel.set(idxArg, (char)calcErr); //char calcErr
|
||||
#else
|
||||
SAFE_KERNEL_SET_ARG(idxArg, imageI); //image2d_t I
|
||||
SAFE_KERNEL_SET_ARG(idxArg, imageJ); //image2d_t J
|
||||
SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadOnly(prevPts)); // __global const float2* prevPts
|
||||
SAFE_KERNEL_SET_ARG(idxArg, (int)prevPts.step); // int prevPtsStep
|
||||
SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadWrite(nextPts)); // __global const float2* nextPts
|
||||
SAFE_KERNEL_SET_ARG(idxArg, (int)nextPts.step); // int nextPtsStep
|
||||
SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadWrite(status)); // __global uchar* status
|
||||
SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadWrite(err)); // __global float* err
|
||||
SAFE_KERNEL_SET_ARG(idxArg, (int)level); // const int level
|
||||
SAFE_KERNEL_SET_ARG(idxArg, (int)I.rows); // const int rows
|
||||
SAFE_KERNEL_SET_ARG(idxArg, (int)I.cols); // const int cols
|
||||
SAFE_KERNEL_SET_ARG(idxArg, (int)patch.x); // int PATCH_X
|
||||
SAFE_KERNEL_SET_ARG(idxArg, (int)patch.y); // int PATCH_Y
|
||||
SAFE_KERNEL_SET_ARG(idxArg, (int)winSize.width); // int c_winSize_x
|
||||
SAFE_KERNEL_SET_ARG(idxArg, (int)winSize.height); // int c_winSize_y
|
||||
SAFE_KERNEL_SET_ARG(idxArg, (int)iters); // int c_iters
|
||||
SAFE_KERNEL_SET_ARG(idxArg, (char)calcErr); //char calcErr
|
||||
#endif
|
||||
|
||||
return kernel.run(2, globalThreads, localThreads, true);
|
||||
}
|
||||
private:
|
||||
inline static bool isDeviceCPU()
|
||||
{
|
||||
return (cv::ocl::Device::TYPE_CPU == cv::ocl::Device::getDefault().type());
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
static bool ocl_calcOpticalFlowPyrLK(InputArray _prevImg, InputArray _nextImg,
|
||||
InputArray _prevPts, InputOutputArray _nextPts,
|
||||
OutputArray _status, OutputArray _err,
|
||||
Size winSize, int maxLevel,
|
||||
TermCriteria criteria,
|
||||
int flags/*, double minEigThreshold*/ )
|
||||
{
|
||||
if (0 != (OPTFLOW_LK_GET_MIN_EIGENVALS & flags))
|
||||
return false;
|
||||
if (!cv::ocl::Device::getDefault().imageSupport())
|
||||
return false;
|
||||
if (_nextImg.size() != _prevImg.size())
|
||||
return false;
|
||||
int typePrev = _prevImg.type();
|
||||
int typeNext = _nextImg.type();
|
||||
if ((1 != CV_MAT_CN(typePrev)) || (1 != CV_MAT_CN(typeNext)))
|
||||
return false;
|
||||
if ((0 != CV_MAT_DEPTH(typePrev)) || (0 != CV_MAT_DEPTH(typeNext)))
|
||||
return false;
|
||||
|
||||
if (_prevPts.empty() || _prevPts.type() != CV_32FC2 || (!_prevPts.isContinuous()))
|
||||
return false;
|
||||
if ((1 != _prevPts.size().height) && (1 != _prevPts.size().width))
|
||||
return false;
|
||||
size_t npoints = _prevPts.total();
|
||||
bool useInitialFlow = (0 != (flags & OPTFLOW_USE_INITIAL_FLOW));
|
||||
if (useInitialFlow)
|
||||
{
|
||||
if (_nextPts.empty() || _nextPts.type() != CV_32FC2 || (!_prevPts.isContinuous()))
|
||||
return false;
|
||||
if ((1 != _nextPts.size().height) && (1 != _nextPts.size().width))
|
||||
return false;
|
||||
if (_nextPts.total() != npoints)
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
_nextPts.create(_prevPts.size(), _prevPts.type());
|
||||
}
|
||||
|
||||
PyrLKOpticalFlow opticalFlow;
|
||||
opticalFlow.winSize = winSize;
|
||||
opticalFlow.maxLevel = maxLevel;
|
||||
opticalFlow.iters = criteria.maxCount;
|
||||
opticalFlow.derivLambda = criteria.epsilon;
|
||||
opticalFlow.useInitialFlow = useInitialFlow;
|
||||
|
||||
if (!opticalFlow.checkParam())
|
||||
return false;
|
||||
|
||||
UMat umatErr;
|
||||
if (_err.needed())
|
||||
{
|
||||
_err.create((int)npoints, 1, CV_32FC1);
|
||||
umatErr = _err.getUMat();
|
||||
}
|
||||
else
|
||||
umatErr.create((int)npoints, 1, CV_32FC1);
|
||||
|
||||
_status.create((int)npoints, 1, CV_8UC1);
|
||||
UMat umatNextPts = _nextPts.getUMat();
|
||||
UMat umatStatus = _status.getUMat();
|
||||
return opticalFlow.sparse(_prevImg.getUMat(), _nextImg.getUMat(), _prevPts.getUMat(), umatNextPts, umatStatus, umatErr);
|
||||
}
|
||||
};
|
||||
|
||||
void cv::calcOpticalFlowPyrLK( InputArray _prevImg, InputArray _nextImg,
|
||||
InputArray _prevPts, InputOutputArray _nextPts,
|
||||
OutputArray _status, OutputArray _err,
|
||||
@ -597,6 +854,10 @@ void cv::calcOpticalFlowPyrLK( InputArray _prevImg, InputArray _nextImg,
|
||||
TermCriteria criteria,
|
||||
int flags, double minEigThreshold )
|
||||
{
|
||||
bool use_opencl = ocl::useOpenCL() && (_prevImg.isUMat() || _nextImg.isUMat());
|
||||
if ( use_opencl && ocl_calcOpticalFlowPyrLK(_prevImg, _nextImg, _prevPts, _nextPts, _status, _err, winSize, maxLevel, criteria, flags/*, minEigThreshold*/))
|
||||
return;
|
||||
|
||||
Mat prevPtsMat = _prevPts.getMat();
|
||||
const int derivDepth = DataType<cv::detail::deriv_type>::depth;
|
||||
|
||||
|
583
modules/video/src/opencl/pyrlk.cl
Normal file
583
modules/video/src/opencl/pyrlk.cl
Normal file
@ -0,0 +1,583 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Dachuan Zhao, dachuan@multicorewareinc.com
|
||||
// Yao Wang, bitwangyaoyao@gmail.com
|
||||
// Xiaopeng Fu, fuxiaopeng2222@163.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#define BUFFER 64
|
||||
#define BUFFER2 BUFFER>>1
|
||||
#ifndef WAVE_SIZE
|
||||
#define WAVE_SIZE 1
|
||||
#endif
|
||||
#ifdef CPU
|
||||
|
||||
inline void reduce3(float val1, float val2, float val3, __local float* smem1, __local float* smem2, __local float* smem3, int tid)
|
||||
{
|
||||
smem1[tid] = val1;
|
||||
smem2[tid] = val2;
|
||||
smem3[tid] = val3;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
for(int i = BUFFER2; i > 0; i >>= 1)
|
||||
{
|
||||
if(tid < i)
|
||||
{
|
||||
smem1[tid] += smem1[tid + i];
|
||||
smem2[tid] += smem2[tid + i];
|
||||
smem3[tid] += smem3[tid + i];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
}
|
||||
|
||||
inline void reduce2(float val1, float val2, volatile __local float* smem1, volatile __local float* smem2, int tid)
|
||||
{
|
||||
smem1[tid] = val1;
|
||||
smem2[tid] = val2;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
for(int i = BUFFER2; i > 0; i >>= 1)
|
||||
{
|
||||
if(tid < i)
|
||||
{
|
||||
smem1[tid] += smem1[tid + i];
|
||||
smem2[tid] += smem2[tid + i];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
}
|
||||
|
||||
inline void reduce1(float val1, volatile __local float* smem1, int tid)
|
||||
{
|
||||
smem1[tid] = val1;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
for(int i = BUFFER2; i > 0; i >>= 1)
|
||||
{
|
||||
if(tid < i)
|
||||
{
|
||||
smem1[tid] += smem1[tid + i];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
}
|
||||
#else
|
||||
inline void reduce3(float val1, float val2, float val3,
|
||||
__local volatile float* smem1, __local volatile float* smem2, __local volatile float* smem3, int tid)
|
||||
{
|
||||
smem1[tid] = val1;
|
||||
smem2[tid] = val2;
|
||||
smem3[tid] = val3;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 32)
|
||||
{
|
||||
smem1[tid] += smem1[tid + 32];
|
||||
smem2[tid] += smem2[tid + 32];
|
||||
smem3[tid] += smem3[tid + 32];
|
||||
#if WAVE_SIZE < 32
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if (tid < 16)
|
||||
{
|
||||
#endif
|
||||
smem1[tid] += smem1[tid + 16];
|
||||
smem2[tid] += smem2[tid + 16];
|
||||
smem3[tid] += smem3[tid + 16];
|
||||
#if WAVE_SIZE <16
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if (tid < 8)
|
||||
{
|
||||
#endif
|
||||
smem1[tid] += smem1[tid + 8];
|
||||
smem2[tid] += smem2[tid + 8];
|
||||
smem3[tid] += smem3[tid + 8];
|
||||
|
||||
smem1[tid] += smem1[tid + 4];
|
||||
smem2[tid] += smem2[tid + 4];
|
||||
smem3[tid] += smem3[tid + 4];
|
||||
|
||||
smem1[tid] += smem1[tid + 2];
|
||||
smem2[tid] += smem2[tid + 2];
|
||||
smem3[tid] += smem3[tid + 2];
|
||||
|
||||
smem1[tid] += smem1[tid + 1];
|
||||
smem2[tid] += smem2[tid + 1];
|
||||
smem3[tid] += smem3[tid + 1];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
|
||||
inline void reduce2(float val1, float val2, __local volatile float* smem1, __local volatile float* smem2, int tid)
|
||||
{
|
||||
smem1[tid] = val1;
|
||||
smem2[tid] = val2;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 32)
|
||||
{
|
||||
smem1[tid] += smem1[tid + 32];
|
||||
smem2[tid] += smem2[tid + 32];
|
||||
#if WAVE_SIZE < 32
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if (tid < 16)
|
||||
{
|
||||
#endif
|
||||
smem1[tid] += smem1[tid + 16];
|
||||
smem2[tid] += smem2[tid + 16];
|
||||
#if WAVE_SIZE <16
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if (tid < 8)
|
||||
{
|
||||
#endif
|
||||
smem1[tid] += smem1[tid + 8];
|
||||
smem2[tid] += smem2[tid + 8];
|
||||
|
||||
smem1[tid] += smem1[tid + 4];
|
||||
smem2[tid] += smem2[tid + 4];
|
||||
|
||||
smem1[tid] += smem1[tid + 2];
|
||||
smem2[tid] += smem2[tid + 2];
|
||||
|
||||
smem1[tid] += smem1[tid + 1];
|
||||
smem2[tid] += smem2[tid + 1];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
|
||||
inline void reduce1(float val1, __local volatile float* smem1, int tid)
|
||||
{
|
||||
smem1[tid] = val1;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 32)
|
||||
{
|
||||
smem1[tid] += smem1[tid + 32];
|
||||
#if WAVE_SIZE < 32
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if (tid < 16)
|
||||
{
|
||||
#endif
|
||||
smem1[tid] += smem1[tid + 16];
|
||||
#if WAVE_SIZE <16
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if (tid < 8)
|
||||
{
|
||||
#endif
|
||||
smem1[tid] += smem1[tid + 8];
|
||||
smem1[tid] += smem1[tid + 4];
|
||||
smem1[tid] += smem1[tid + 2];
|
||||
smem1[tid] += smem1[tid + 1];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
#endif
|
||||
|
||||
#define SCALE (1.0f / (1 << 20))
|
||||
#define THRESHOLD 0.01f
|
||||
|
||||
// Image read mode
|
||||
__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR;
|
||||
|
||||
inline void SetPatch(image2d_t I, float x, float y,
|
||||
float* Pch, float* Dx, float* Dy,
|
||||
float* A11, float* A12, float* A22)
|
||||
{
|
||||
*Pch = read_imagef(I, sampler, (float2)(x, y)).x;
|
||||
|
||||
float dIdx = 3.0f * read_imagef(I, sampler, (float2)(x + 1, y - 1)).x + 10.0f * read_imagef(I, sampler, (float2)(x + 1, y)).x + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y + 1)).x -
|
||||
(3.0f * read_imagef(I, sampler, (float2)(x - 1, y - 1)).x + 10.0f * read_imagef(I, sampler, (float2)(x - 1, y)).x + 3.0f * read_imagef(I, sampler, (float2)(x - 1, y + 1)).x);
|
||||
|
||||
float dIdy = 3.0f * read_imagef(I, sampler, (float2)(x - 1, y + 1)).x + 10.0f * read_imagef(I, sampler, (float2)(x, y + 1)).x + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y + 1)).x -
|
||||
(3.0f * read_imagef(I, sampler, (float2)(x - 1, y - 1)).x + 10.0f * read_imagef(I, sampler, (float2)(x, y - 1)).x + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y - 1)).x);
|
||||
|
||||
|
||||
*Dx = dIdx;
|
||||
*Dy = dIdy;
|
||||
|
||||
*A11 += dIdx * dIdx;
|
||||
*A12 += dIdx * dIdy;
|
||||
*A22 += dIdy * dIdy;
|
||||
}
|
||||
|
||||
inline void GetPatch(image2d_t J, float x, float y,
|
||||
float* Pch, float* Dx, float* Dy,
|
||||
float* b1, float* b2)
|
||||
{
|
||||
float J_val = read_imagef(J, sampler, (float2)(x, y)).x;
|
||||
float diff = (J_val - *Pch) * 32.0f;
|
||||
*b1 += diff**Dx;
|
||||
*b2 += diff**Dy;
|
||||
}
|
||||
|
||||
inline void GetError(image2d_t J, const float x, const float y, const float* Pch, float* errval)
|
||||
{
|
||||
float diff = read_imagef(J, sampler, (float2)(x,y)).x-*Pch;
|
||||
*errval += fabs(diff);
|
||||
}
|
||||
|
||||
inline void SetPatch4(image2d_t I, const float x, const float y,
|
||||
float4* Pch, float4* Dx, float4* Dy,
|
||||
float* A11, float* A12, float* A22)
|
||||
{
|
||||
*Pch = read_imagef(I, sampler, (float2)(x, y));
|
||||
|
||||
float4 dIdx = 3.0f * read_imagef(I, sampler, (float2)(x + 1, y - 1)) + 10.0f * read_imagef(I, sampler, (float2)(x + 1, y)) + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y + 1)) -
|
||||
(3.0f * read_imagef(I, sampler, (float2)(x - 1, y - 1)) + 10.0f * read_imagef(I, sampler, (float2)(x - 1, y)) + 3.0f * read_imagef(I, sampler, (float2)(x - 1, y + 1)));
|
||||
|
||||
float4 dIdy = 3.0f * read_imagef(I, sampler, (float2)(x - 1, y + 1)) + 10.0f * read_imagef(I, sampler, (float2)(x, y + 1)) + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y + 1)) -
|
||||
(3.0f * read_imagef(I, sampler, (float2)(x - 1, y - 1)) + 10.0f * read_imagef(I, sampler, (float2)(x, y - 1)) + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y - 1)));
|
||||
|
||||
|
||||
*Dx = dIdx;
|
||||
*Dy = dIdy;
|
||||
float4 sqIdx = dIdx * dIdx;
|
||||
*A11 += sqIdx.x + sqIdx.y + sqIdx.z;
|
||||
sqIdx = dIdx * dIdy;
|
||||
*A12 += sqIdx.x + sqIdx.y + sqIdx.z;
|
||||
sqIdx = dIdy * dIdy;
|
||||
*A22 += sqIdx.x + sqIdx.y + sqIdx.z;
|
||||
}
|
||||
|
||||
inline void GetPatch4(image2d_t J, const float x, const float y,
|
||||
const float4* Pch, const float4* Dx, const float4* Dy,
|
||||
float* b1, float* b2)
|
||||
{
|
||||
float4 J_val = read_imagef(J, sampler, (float2)(x, y));
|
||||
float4 diff = (J_val - *Pch) * 32.0f;
|
||||
float4 xdiff = diff* *Dx;
|
||||
*b1 += xdiff.x + xdiff.y + xdiff.z;
|
||||
xdiff = diff* *Dy;
|
||||
*b2 += xdiff.x + xdiff.y + xdiff.z;
|
||||
}
|
||||
|
||||
inline void GetError4(image2d_t J, const float x, const float y, const float4* Pch, float* errval)
|
||||
{
|
||||
float4 diff = read_imagef(J, sampler, (float2)(x,y))-*Pch;
|
||||
*errval += fabs(diff.x) + fabs(diff.y) + fabs(diff.z);
|
||||
}
|
||||
|
||||
#define GRIDSIZE 3
|
||||
__kernel void lkSparse(image2d_t I, image2d_t J,
|
||||
__global const float2* prevPts, int prevPtsStep, __global float2* nextPts, int nextPtsStep, __global uchar* status, __global float* err,
|
||||
const int level, const int rows, const int cols, int PATCH_X, int PATCH_Y, int c_winSize_x, int c_winSize_y, int c_iters, char calcErr)
|
||||
{
|
||||
__local float smem1[BUFFER];
|
||||
__local float smem2[BUFFER];
|
||||
__local float smem3[BUFFER];
|
||||
|
||||
unsigned int xid=get_local_id(0);
|
||||
unsigned int yid=get_local_id(1);
|
||||
unsigned int gid=get_group_id(0);
|
||||
unsigned int xsize=get_local_size(0);
|
||||
unsigned int ysize=get_local_size(1);
|
||||
int xBase, yBase, k;
|
||||
|
||||
float2 c_halfWin = (float2)((c_winSize_x - 1)>>1, (c_winSize_y - 1)>>1);
|
||||
|
||||
const int tid = mad24(yid, xsize, xid);
|
||||
|
||||
float2 prevPt = prevPts[gid] / (float2)(1 << level);
|
||||
|
||||
if (prevPt.x < 0 || prevPt.x >= cols || prevPt.y < 0 || prevPt.y >= rows)
|
||||
{
|
||||
if (tid == 0 && level == 0)
|
||||
{
|
||||
status[gid] = 0;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
prevPt -= c_halfWin;
|
||||
|
||||
// extract the patch from the first image, compute covariation matrix of derivatives
|
||||
|
||||
float A11 = 0;
|
||||
float A12 = 0;
|
||||
float A22 = 0;
|
||||
|
||||
float I_patch[GRIDSIZE][GRIDSIZE];
|
||||
float dIdx_patch[GRIDSIZE][GRIDSIZE];
|
||||
float dIdy_patch[GRIDSIZE][GRIDSIZE];
|
||||
|
||||
yBase=yid;
|
||||
{
|
||||
xBase=xid;
|
||||
SetPatch(I, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
|
||||
&I_patch[0][0], &dIdx_patch[0][0], &dIdy_patch[0][0],
|
||||
&A11, &A12, &A22);
|
||||
|
||||
|
||||
xBase+=xsize;
|
||||
SetPatch(I, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
|
||||
&I_patch[0][1], &dIdx_patch[0][1], &dIdy_patch[0][1],
|
||||
&A11, &A12, &A22);
|
||||
|
||||
xBase+=xsize;
|
||||
if(xBase<c_winSize_x)
|
||||
SetPatch(I, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
|
||||
&I_patch[0][2], &dIdx_patch[0][2], &dIdy_patch[0][2],
|
||||
&A11, &A12, &A22);
|
||||
}
|
||||
yBase+=ysize;
|
||||
{
|
||||
xBase=xid;
|
||||
SetPatch(I, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
|
||||
&I_patch[1][0], &dIdx_patch[1][0], &dIdy_patch[1][0],
|
||||
&A11, &A12, &A22);
|
||||
|
||||
|
||||
xBase+=xsize;
|
||||
SetPatch(I, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
|
||||
&I_patch[1][1], &dIdx_patch[1][1], &dIdy_patch[1][1],
|
||||
&A11, &A12, &A22);
|
||||
|
||||
xBase+=xsize;
|
||||
if(xBase<c_winSize_x)
|
||||
SetPatch(I, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
|
||||
&I_patch[1][2], &dIdx_patch[1][2], &dIdy_patch[1][2],
|
||||
&A11, &A12, &A22);
|
||||
}
|
||||
yBase+=ysize;
|
||||
if(yBase<c_winSize_y)
|
||||
{
|
||||
xBase=xid;
|
||||
SetPatch(I, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
|
||||
&I_patch[2][0], &dIdx_patch[2][0], &dIdy_patch[2][0],
|
||||
&A11, &A12, &A22);
|
||||
|
||||
|
||||
xBase+=xsize;
|
||||
SetPatch(I, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
|
||||
&I_patch[2][1], &dIdx_patch[2][1], &dIdy_patch[2][1],
|
||||
&A11, &A12, &A22);
|
||||
|
||||
xBase+=xsize;
|
||||
if(xBase<c_winSize_x)
|
||||
SetPatch(I, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
|
||||
&I_patch[2][2], &dIdx_patch[2][2], &dIdy_patch[2][2],
|
||||
&A11, &A12, &A22);
|
||||
}
|
||||
|
||||
reduce3(A11, A12, A22, smem1, smem2, smem3, tid);
|
||||
|
||||
A11 = smem1[0];
|
||||
A12 = smem2[0];
|
||||
A22 = smem3[0];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
float D = A11 * A22 - A12 * A12;
|
||||
|
||||
if (D < 1.192092896e-07f)
|
||||
{
|
||||
if (tid == 0 && level == 0)
|
||||
status[gid] = 0;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
A11 /= D;
|
||||
A12 /= D;
|
||||
A22 /= D;
|
||||
|
||||
prevPt = nextPts[gid] * 2.0f - c_halfWin;
|
||||
|
||||
for (k = 0; k < c_iters; ++k)
|
||||
{
|
||||
if (prevPt.x < -c_halfWin.x || prevPt.x >= cols || prevPt.y < -c_halfWin.y || prevPt.y >= rows)
|
||||
{
|
||||
if (tid == 0 && level == 0)
|
||||
status[gid] = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
float b1 = 0;
|
||||
float b2 = 0;
|
||||
|
||||
yBase=yid;
|
||||
{
|
||||
xBase=xid;
|
||||
GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
|
||||
&I_patch[0][0], &dIdx_patch[0][0], &dIdy_patch[0][0],
|
||||
&b1, &b2);
|
||||
|
||||
|
||||
xBase+=xsize;
|
||||
GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
|
||||
&I_patch[0][1], &dIdx_patch[0][1], &dIdy_patch[0][1],
|
||||
&b1, &b2);
|
||||
|
||||
xBase+=xsize;
|
||||
if(xBase<c_winSize_x)
|
||||
GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
|
||||
&I_patch[0][2], &dIdx_patch[0][2], &dIdy_patch[0][2],
|
||||
&b1, &b2);
|
||||
}
|
||||
yBase+=ysize;
|
||||
{
|
||||
xBase=xid;
|
||||
GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
|
||||
&I_patch[1][0], &dIdx_patch[1][0], &dIdy_patch[1][0],
|
||||
&b1, &b2);
|
||||
|
||||
|
||||
xBase+=xsize;
|
||||
GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
|
||||
&I_patch[1][1], &dIdx_patch[1][1], &dIdy_patch[1][1],
|
||||
&b1, &b2);
|
||||
|
||||
xBase+=xsize;
|
||||
if(xBase<c_winSize_x)
|
||||
GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
|
||||
&I_patch[1][2], &dIdx_patch[1][2], &dIdy_patch[1][2],
|
||||
&b1, &b2);
|
||||
}
|
||||
yBase+=ysize;
|
||||
if(yBase<c_winSize_y)
|
||||
{
|
||||
xBase=xid;
|
||||
GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
|
||||
&I_patch[2][0], &dIdx_patch[2][0], &dIdy_patch[2][0],
|
||||
&b1, &b2);
|
||||
|
||||
|
||||
xBase+=xsize;
|
||||
GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
|
||||
&I_patch[2][1], &dIdx_patch[2][1], &dIdy_patch[2][1],
|
||||
&b1, &b2);
|
||||
|
||||
xBase+=xsize;
|
||||
if(xBase<c_winSize_x)
|
||||
GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
|
||||
&I_patch[2][2], &dIdx_patch[2][2], &dIdy_patch[2][2],
|
||||
&b1, &b2);
|
||||
}
|
||||
|
||||
reduce2(b1, b2, smem1, smem2, tid);
|
||||
|
||||
b1 = smem1[0];
|
||||
b2 = smem2[0];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
float2 delta;
|
||||
delta.x = A12 * b2 - A22 * b1;
|
||||
delta.y = A12 * b1 - A11 * b2;
|
||||
|
||||
prevPt += delta;
|
||||
|
||||
if (fabs(delta.x) < THRESHOLD && fabs(delta.y) < THRESHOLD)
|
||||
break;
|
||||
}
|
||||
|
||||
D = 0.0f;
|
||||
if (calcErr)
|
||||
{
|
||||
yBase=yid;
|
||||
{
|
||||
xBase=xid;
|
||||
GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
|
||||
&I_patch[0][0], &D);
|
||||
|
||||
|
||||
xBase+=xsize;
|
||||
GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
|
||||
&I_patch[0][1], &D);
|
||||
|
||||
xBase+=xsize;
|
||||
if(xBase<c_winSize_x)
|
||||
GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
|
||||
&I_patch[0][2], &D);
|
||||
}
|
||||
yBase+=ysize;
|
||||
{
|
||||
xBase=xid;
|
||||
GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
|
||||
&I_patch[1][0], &D);
|
||||
|
||||
|
||||
xBase+=xsize;
|
||||
GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
|
||||
&I_patch[1][1], &D);
|
||||
|
||||
xBase+=xsize;
|
||||
if(xBase<c_winSize_x)
|
||||
GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
|
||||
&I_patch[1][2], &D);
|
||||
}
|
||||
yBase+=ysize;
|
||||
if(yBase<c_winSize_y)
|
||||
{
|
||||
xBase=xid;
|
||||
GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
|
||||
&I_patch[2][0], &D);
|
||||
|
||||
|
||||
xBase+=xsize;
|
||||
GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
|
||||
&I_patch[2][1], &D);
|
||||
|
||||
xBase+=xsize;
|
||||
if(xBase<c_winSize_x)
|
||||
GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
|
||||
&I_patch[2][2], &D);
|
||||
}
|
||||
|
||||
reduce1(D, smem1, tid);
|
||||
}
|
||||
|
||||
if (tid == 0)
|
||||
{
|
||||
prevPt += c_halfWin;
|
||||
|
||||
nextPts[gid] = prevPt;
|
||||
|
||||
if (calcErr)
|
||||
err[gid] = smem1[0] / (float)(c_winSize_x * c_winSize_y);
|
||||
}
|
||||
}
|
143
modules/video/test/ocl/test_optflowpyrlk.cpp
Normal file
143
modules/video/test/ocl/test_optflowpyrlk.cpp
Normal file
@ -0,0 +1,143 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
|
||||
#include "test_precomp.hpp"
|
||||
#include "opencv2/ts/ocl_test.hpp"
|
||||
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
|
||||
|
||||
namespace cvtest {
|
||||
namespace ocl {
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// PyrLKOpticalFlow
|
||||
|
||||
PARAM_TEST_CASE(PyrLKOpticalFlow, int, int)
|
||||
{
|
||||
Size winSize;
|
||||
int maxLevel;
|
||||
TermCriteria criteria;
|
||||
int flags;
|
||||
double minEigThreshold;
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
winSize = Size(GET_PARAM(0), GET_PARAM(0));
|
||||
maxLevel = GET_PARAM(1);
|
||||
criteria = TermCriteria(TermCriteria::COUNT+TermCriteria::EPS, 30, 0.01);
|
||||
flags = 0;
|
||||
minEigThreshold = 1e-4f;
|
||||
}
|
||||
};
|
||||
|
||||
OCL_TEST_P(PyrLKOpticalFlow, Mat)
|
||||
{
|
||||
static const int npoints = 1000;
|
||||
static const float eps = 0.03f;
|
||||
|
||||
cv::Mat frame0 = readImage("optflow/RubberWhale1.png", cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(frame0.empty());
|
||||
UMat umatFrame0; frame0.copyTo(umatFrame0);
|
||||
|
||||
cv::Mat frame1 = readImage("optflow/RubberWhale2.png", cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(frame1.empty());
|
||||
UMat umatFrame1; frame1.copyTo(umatFrame1);
|
||||
|
||||
std::vector<cv::Point2f> pts;
|
||||
cv::goodFeaturesToTrack(frame0, pts, npoints, 0.01, 0.0);
|
||||
|
||||
std::vector<cv::Point2f> cpuNextPts;
|
||||
std::vector<unsigned char> cpuStatusCPU;
|
||||
std::vector<float> cpuErr;
|
||||
OCL_OFF(cv::calcOpticalFlowPyrLK(frame0, frame1, pts, cpuNextPts, cpuStatusCPU, cpuErr, winSize, maxLevel, criteria, flags, minEigThreshold));
|
||||
|
||||
UMat umatNextPts, umatStatus, umatErr;
|
||||
OCL_ON(cv::calcOpticalFlowPyrLK(umatFrame0, umatFrame1, pts, umatNextPts, umatStatus, umatErr, winSize, maxLevel, criteria, flags, minEigThreshold));
|
||||
std::vector<cv::Point2f> nextPts; umatNextPts.reshape(2, 1).copyTo(nextPts);
|
||||
std::vector<unsigned char> status; umatStatus.reshape(1, 1).copyTo(status);
|
||||
std::vector<float> err; umatErr.reshape(1, 1).copyTo(err);
|
||||
|
||||
ASSERT_EQ(cpuNextPts.size(), nextPts.size());
|
||||
ASSERT_EQ(cpuStatusCPU.size(), status.size());
|
||||
|
||||
size_t mistmatch = 0;
|
||||
for (size_t i = 0; i < nextPts.size(); ++i)
|
||||
{
|
||||
if (status[i] != cpuStatusCPU[i])
|
||||
{
|
||||
++mistmatch;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (status[i])
|
||||
{
|
||||
cv::Point2i a = nextPts[i];
|
||||
cv::Point2i b = cpuNextPts[i];
|
||||
|
||||
bool eq = std::abs(a.x - b.x) < 1 && std::abs(a.y - b.y) < 1;
|
||||
float errdiff = 0.0f;
|
||||
|
||||
if (!eq || errdiff > 1e-1)
|
||||
++mistmatch;
|
||||
}
|
||||
}
|
||||
|
||||
double bad_ratio = static_cast<double>(mistmatch) / (nextPts.size());
|
||||
|
||||
ASSERT_LE(bad_ratio, eps);
|
||||
}
|
||||
|
||||
OCL_INSTANTIATE_TEST_CASE_P(Video, PyrLKOpticalFlow,
|
||||
Combine(
|
||||
Values(21, 25),
|
||||
Values(3, 5)
|
||||
)
|
||||
);
|
||||
|
||||
} } // namespace cvtest::ocl
|
||||
|
||||
|
||||
#endif // HAVE_OPENCL
|
Loading…
x
Reference in New Issue
Block a user