From a10fed8fd1125dd9cac4674d7090d0dd28401d6f Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Mon, 13 Feb 2012 12:57:27 +0000 Subject: [PATCH] added GoodFeaturesToTrackDetector_GPU and PyrLKOpticalFlow to gpu module --- modules/gpu/include/opencv2/gpu/gpu.hpp | 102 +++ modules/gpu/perf/perf_video.cpp | 86 ++- modules/gpu/src/cuda/gftt.cu | 146 +++++ modules/gpu/src/cuda/pyrlk.cu | 599 ++++++++++++++++++ modules/gpu/src/gftt.cpp | 165 +++++ modules/gpu/src/pyrlk.cpp | 295 +++++++++ modules/gpu/test/test_precomp.hpp | 1 + modules/gpu/test/test_video.cpp | 170 ++++- ...optical_flow.cpp => brox_optical_flow.cpp} | 2 +- samples/gpu/performance/tests.cpp | 74 +++ samples/gpu/pyrlk_optical_flow.cpp | 279 ++++++++ 11 files changed, 1913 insertions(+), 6 deletions(-) create mode 100644 modules/gpu/src/cuda/gftt.cu create mode 100644 modules/gpu/src/cuda/pyrlk.cu create mode 100644 modules/gpu/src/gftt.cpp create mode 100644 modules/gpu/src/pyrlk.cpp rename samples/gpu/{optical_flow.cpp => brox_optical_flow.cpp} (95%) create mode 100644 samples/gpu/pyrlk_optical_flow.cpp diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index 0f43b542f..f4ca508bd 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -1717,6 +1717,108 @@ public: GpuMat buf; }; +class CV_EXPORTS GoodFeaturesToTrackDetector_GPU +{ +public: + GoodFeaturesToTrackDetector_GPU(int maxCorners_, double qualityLevel_, double minDistance_) + { + maxCorners = maxCorners_; + qualityLevel = qualityLevel_; + minDistance = minDistance_; + + blockSize = 3; + useHarrisDetector = false; + harrisK = 0.04; + } + + //! return 1 rows matrix with CV_32FC2 type + void operator ()(const GpuMat& image, GpuMat& corners, const GpuMat& mask = GpuMat()); + + int maxCorners; + double qualityLevel; + double minDistance; + + int blockSize; + bool useHarrisDetector; + double harrisK; + + void releaseMemory() + { + Dx_.release(); + Dy_.release(); + buf_.release(); + eig_.release(); + minMaxbuf_.release(); + tmpCorners_.release(); + } + +private: + GpuMat Dx_; + GpuMat Dy_; + GpuMat buf_; + GpuMat eig_; + GpuMat minMaxbuf_; + GpuMat tmpCorners_; +}; + +class CV_EXPORTS PyrLKOpticalFlow +{ +public: + PyrLKOpticalFlow() + { + winSize = Size(21, 21); + maxLevel = 3; + iters = 30; + derivLambda = 0.5; + useInitialFlow = false; + minEigThreshold = 1e-4f; + } + + void sparse(const GpuMat& prevImg, const GpuMat& nextImg, const GpuMat& prevPts, GpuMat& nextPts, + GpuMat& status, GpuMat* err = 0); + + void dense(const GpuMat& prevImg, const GpuMat& nextImg, GpuMat& u, GpuMat& v, GpuMat* err = 0); + + Size winSize; + int maxLevel; + int iters; + double derivLambda; + bool useInitialFlow; + float minEigThreshold; + + void releaseMemory() + { + dx_calcBuf_.release(); + dy_calcBuf_.release(); + + prevPyr_.clear(); + nextPyr_.clear(); + + dx_buf_.release(); + dy_buf_.release(); + + uPyr_.clear(); + vPyr_.clear(); + } + +private: + void calcSharrDeriv(const GpuMat& src, GpuMat& dx, GpuMat& dy); + + void buildImagePyramid(const GpuMat& img0, vector& pyr, bool withBorder); + + GpuMat dx_calcBuf_; + GpuMat dy_calcBuf_; + + vector prevPyr_; + vector nextPyr_; + + GpuMat dx_buf_; + GpuMat dy_buf_; + + vector uPyr_; + vector vPyr_; +}; + //! Interpolate frames (images) using provided optical flow (displacement field). //! frame0 - frame 0 (32-bit floating point images, single channel) //! frame1 - frame 1 (the same type and size) diff --git a/modules/gpu/perf/perf_video.cpp b/modules/gpu/perf/perf_video.cpp index 3cd73bea5..b349bc54e 100644 --- a/modules/gpu/perf/perf_video.cpp +++ b/modules/gpu/perf/perf_video.cpp @@ -98,21 +98,99 @@ GPU_PERF_TEST_1(CreateOpticalFlowNeedleMap, cv::gpu::DeviceInfo) cv::gpu::GpuMat frame0(frame0_host); cv::gpu::GpuMat frame1(frame1_host); - cv::gpu::GpuMat d_u, d_v; + cv::gpu::GpuMat u, v; cv::gpu::BroxOpticalFlow d_flow(0.197f /*alpha*/, 50.0f /*gamma*/, 0.8f /*scale_factor*/, 10 /*inner_iterations*/, 77 /*outer_iterations*/, 10 /*solver_iterations*/); - d_flow(frame0, frame1, d_u, d_v); + d_flow(frame0, frame1, u, v); - cv::gpu::GpuMat d_vertex, d_colors; + cv::gpu::GpuMat vertex, colors; TEST_CYCLE() { - cv::gpu::createOpticalFlowNeedleMap(d_u, d_v, d_vertex, d_colors); + cv::gpu::createOpticalFlowNeedleMap(u, v, vertex, colors); } } INSTANTIATE_TEST_CASE_P(Video, CreateOpticalFlowNeedleMap, ALL_DEVICES); +////////////////////////////////////////////////////// +// GoodFeaturesToTrack + +GPU_PERF_TEST(GoodFeaturesToTrack, cv::gpu::DeviceInfo, double) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + double minDistance = GET_PARAM(1); + + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Mat image_host = readImage("gpu/perf/aloe.jpg", cv::IMREAD_GRAYSCALE); + + ASSERT_FALSE(image_host.empty()); + + cv::gpu::GoodFeaturesToTrackDetector_GPU detector(8000, 0.01, minDistance); + + cv::gpu::GpuMat image(image_host); + cv::gpu::GpuMat pts; + + TEST_CYCLE() + { + detector(image, pts); + } +} + +INSTANTIATE_TEST_CASE_P(Video, GoodFeaturesToTrack, testing::Combine(ALL_DEVICES, testing::Values(0.0, 3.0))); + +////////////////////////////////////////////////////// +// PyrLKOpticalFlowSparse + +GPU_PERF_TEST(PyrLKOpticalFlowSparse, cv::gpu::DeviceInfo, bool, int, int) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + bool useGray = GET_PARAM(1); + int points = GET_PARAM(2); + int win_size = GET_PARAM(3); + + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Mat frame0_host = readImage("gpu/opticalflow/frame0.png", useGray ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR); + cv::Mat frame1_host = readImage("gpu/opticalflow/frame1.png", useGray ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR); + + ASSERT_FALSE(frame0_host.empty()); + ASSERT_FALSE(frame1_host.empty()); + + cv::Mat gray_frame; + if (useGray) + gray_frame = frame0_host; + else + cv::cvtColor(frame0_host, gray_frame, cv::COLOR_BGR2GRAY); + + cv::gpu::GpuMat pts; + + cv::gpu::GoodFeaturesToTrackDetector_GPU detector(points, 0.01, 0.0); + detector(cv::gpu::GpuMat(gray_frame), pts); + + cv::gpu::PyrLKOpticalFlow pyrLK; + pyrLK.winSize = cv::Size(win_size, win_size); + + cv::gpu::GpuMat frame0(frame0_host); + cv::gpu::GpuMat frame1(frame1_host); + cv::gpu::GpuMat nextPts; + cv::gpu::GpuMat status; + + TEST_CYCLE() + { + pyrLK.sparse(frame0, frame1, pts, nextPts, status); + } +} + +INSTANTIATE_TEST_CASE_P(Video, PyrLKOpticalFlowSparse, testing::Combine + ( + ALL_DEVICES, + testing::Bool(), + testing::Values(1000, 2000, 4000, 8000), + testing::Values(17, 21) + )); + #endif diff --git a/modules/gpu/src/cuda/gftt.cu b/modules/gpu/src/cuda/gftt.cu new file mode 100644 index 000000000..0d1be7194 --- /dev/null +++ b/modules/gpu/src/cuda/gftt.cu @@ -0,0 +1,146 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +// Copyright (c) 2010, Paul Furgale, Chi Hay Tong +// +// The original code was written by Paul Furgale and Chi Hay Tong +// and later optimized and prepared for integration into OpenCV by Itseez. +// +//M*/ + +#include + +#include "opencv2/gpu/device/common.hpp" +#include "opencv2/gpu/device/utility.hpp" + +namespace cv { namespace gpu { namespace device +{ + namespace gfft + { + texture eigTex(0, cudaFilterModePoint, cudaAddressModeClamp); + + __device__ uint g_counter = 0; + + template __global__ void findCorners(float threshold, const Mask mask, float2* corners, uint max_count, int rows, int cols) + { + #if __CUDA_ARCH__ >= 110 + + const int j = blockIdx.x * blockDim.x + threadIdx.x; + const int i = blockIdx.y * blockDim.y + threadIdx.y; + + if (i > 0 && i < rows - 1 && j > 0 && j < cols - 1 && mask(i, j)) + { + float val = tex2D(eigTex, j, i); + + if (val > threshold) + { + float maxVal = val; + + maxVal = ::fmax(tex2D(eigTex, j - 1, i - 1), maxVal); + maxVal = ::fmax(tex2D(eigTex, j , i - 1), maxVal); + maxVal = ::fmax(tex2D(eigTex, j + 1, i - 1), maxVal); + + maxVal = ::fmax(tex2D(eigTex, j - 1, i), maxVal); + maxVal = ::fmax(tex2D(eigTex, j + 1, i), maxVal); + + maxVal = ::fmax(tex2D(eigTex, j - 1, i + 1), maxVal); + maxVal = ::fmax(tex2D(eigTex, j , i + 1), maxVal); + maxVal = ::fmax(tex2D(eigTex, j + 1, i + 1), maxVal); + + if (val == maxVal) + { + const uint ind = atomicInc(&g_counter, (uint)(-1)); + + if (ind < max_count) + corners[ind] = make_float2(j, i); + } + } + } + + #endif // __CUDA_ARCH__ >= 110 + } + + int findCorners_gpu(DevMem2Df eig, float threshold, DevMem2Db mask, float2* corners, int max_count) + { + void* counter_ptr; + cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) ); + + cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(uint)) ); + + bindTexture(&eigTex, eig); + + dim3 block(16, 16); + dim3 grid(divUp(eig.cols, block.x), divUp(eig.rows, block.y)); + + if (mask.data) + findCorners<<>>(threshold, SingleMask(mask), corners, max_count, eig.rows, eig.cols); + else + findCorners<<>>(threshold, WithOutMask(), corners, max_count, eig.rows, eig.cols); + + cudaSafeCall( cudaGetLastError() ); + + cudaSafeCall( cudaDeviceSynchronize() ); + + uint count; + cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(uint), cudaMemcpyDeviceToHost) ); + + return min(count, max_count); + } + + class EigGreater + { + public: + __device__ __forceinline__ bool operator()(float2 a, float2 b) const + { + return tex2D(eigTex, a.x, a.y) > tex2D(eigTex, b.x, b.y); + } + }; + + + void sortCorners_gpu(DevMem2Df eig, float2* corners, int count) + { + bindTexture(&eigTex, eig); + + thrust::device_ptr ptr(corners); + + thrust::sort(ptr, ptr + count, EigGreater()); + } + } // namespace optical_flow +}}} diff --git a/modules/gpu/src/cuda/pyrlk.cu b/modules/gpu/src/cuda/pyrlk.cu new file mode 100644 index 000000000..e40ade8ab --- /dev/null +++ b/modules/gpu/src/cuda/pyrlk.cu @@ -0,0 +1,599 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +// Copyright (c) 2010, Paul Furgale, Chi Hay Tong +// +// The original code was written by Paul Furgale and Chi Hay Tong +// and later optimized and prepared for integration into OpenCV by Itseez. +// +//M*/ + +#include "opencv2/gpu/device/common.hpp" +#include "opencv2/gpu/device/utility.hpp" +#include "opencv2/gpu/device/functional.hpp" +#include "opencv2/gpu/device/limits.hpp" + +namespace cv { namespace gpu { namespace device +{ + namespace pyrlk + { + __constant__ int c_cn; + __constant__ float c_minEigThreshold; + __constant__ int c_winSize_x; + __constant__ int c_winSize_y; + __constant__ int c_winSize_x_cn; + __constant__ int c_halfWin_x; + __constant__ int c_halfWin_y; + __constant__ int c_iters; + + void loadConstants(int cn, float minEigThreshold, int2 winSize, int iters) + { + int2 halfWin = make_int2((winSize.x - 1) / 2, (winSize.y - 1) / 2); + cudaSafeCall( cudaMemcpyToSymbol(c_cn, &cn, sizeof(int)) ); + cudaSafeCall( cudaMemcpyToSymbol(c_minEigThreshold, &minEigThreshold, sizeof(float)) ); + cudaSafeCall( cudaMemcpyToSymbol(c_winSize_x, &winSize.x, sizeof(int)) ); + cudaSafeCall( cudaMemcpyToSymbol(c_winSize_y, &winSize.y, sizeof(int)) ); + winSize.x *= cn; + cudaSafeCall( cudaMemcpyToSymbol(c_winSize_x_cn, &winSize.x, sizeof(int)) ); + cudaSafeCall( cudaMemcpyToSymbol(c_halfWin_x, &halfWin.x, sizeof(int)) ); + cudaSafeCall( cudaMemcpyToSymbol(c_halfWin_y, &halfWin.y, sizeof(int)) ); + cudaSafeCall( cudaMemcpyToSymbol(c_iters, &iters, sizeof(int)) ); + } + + __global__ void calcSharrDeriv_vertical(const PtrStepb src, PtrStep dx_buf, PtrStep dy_buf, int rows, int colsn) + { + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + if (y < rows && x < colsn) + { + const uchar src_val0 = src(y > 0 ? y - 1 : 1, x); + const uchar src_val1 = src(y, x); + const uchar src_val2 = src(y < rows - 1 ? y + 1 : rows - 2, x); + + dx_buf(y, x) = (src_val0 + src_val2) * 3 + src_val1 * 10; + dy_buf(y, x) = src_val2 - src_val0; + } + } + + __global__ void calcSharrDeriv_horizontal(const PtrStep dx_buf, const PtrStep dy_buf, PtrStep dIdx, PtrStep dIdy, int rows, int cols) + { + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + const int colsn = cols * c_cn; + + if (y < rows && x < colsn) + { + const short* dx_buf_row = dx_buf.ptr(y); + const short* dy_buf_row = dy_buf.ptr(y); + + const int xr = x + c_cn < colsn ? x + c_cn : (cols - 2) * c_cn + x + c_cn - colsn; + const int xl = x - c_cn >= 0 ? x - c_cn : c_cn + x; + + dIdx(y, x) = dx_buf_row[xr] - dx_buf_row[xl]; + dIdy(y, x) = (dy_buf_row[xr] + dy_buf_row[xl]) * 3 + dy_buf_row[x] * 10; + } + } + + void calcSharrDeriv_gpu(DevMem2Db src, DevMem2D_ dx_buf, DevMem2D_ dy_buf, DevMem2D_ dIdx, DevMem2D_ dIdy, int cn, + cudaStream_t stream) + { + dim3 block(32, 8); + dim3 grid(divUp(src.cols * cn, block.x), divUp(src.rows, block.y)); + + calcSharrDeriv_vertical<<>>(src, dx_buf, dy_buf, src.rows, src.cols * cn); + cudaSafeCall( cudaGetLastError() ); + + calcSharrDeriv_horizontal<<>>(dx_buf, dy_buf, dIdx, dIdy, src.rows, src.cols); + cudaSafeCall( cudaGetLastError() ); + + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); + } + + #define W_BITS 14 + #define W_BITS1 14 + + #define CV_DESCALE(x, n) (((x) + (1 << ((n)-1))) >> (n)) + + __device__ int linearFilter(const PtrStepb& src, float2 pt, int x, int y) + { + int2 ipt; + ipt.x = __float2int_rd(pt.x); + ipt.y = __float2int_rd(pt.y); + + float a = pt.x - ipt.x; + float b = pt.y - ipt.y; + + int iw00 = __float2int_rn((1.0f - a) * (1.0f - b) * (1 << W_BITS)); + int iw01 = __float2int_rn(a * (1.0f - b) * (1 << W_BITS)); + int iw10 = __float2int_rn((1.0f - a) * b * (1 << W_BITS)); + int iw11 = (1 << W_BITS) - iw00 - iw01 - iw10; + + const uchar* src_row = src.ptr(ipt.y + y) + ipt.x * c_cn; + const uchar* src_row1 = src.ptr(ipt.y + y + 1) + ipt.x * c_cn; + + return CV_DESCALE(src_row[x] * iw00 + src_row[x + c_cn] * iw01 + src_row1[x] * iw10 + src_row1[x + c_cn] * iw11, W_BITS1 - 5); + } + + __device__ int linearFilter(const PtrStep& src, float2 pt, int x, int y) + { + int2 ipt; + ipt.x = __float2int_rd(pt.x); + ipt.y = __float2int_rd(pt.y); + + float a = pt.x - ipt.x; + float b = pt.y - ipt.y; + + int iw00 = __float2int_rn((1.0f - a) * (1.0f - b) * (1 << W_BITS)); + int iw01 = __float2int_rn(a * (1.0f - b) * (1 << W_BITS)); + int iw10 = __float2int_rn((1.0f - a) * b * (1 << W_BITS)); + int iw11 = (1 << W_BITS) - iw00 - iw01 - iw10; + + const short* src_row = src.ptr(ipt.y + y) + ipt.x * c_cn; + const short* src_row1 = src.ptr(ipt.y + y + 1) + ipt.x * c_cn; + + return CV_DESCALE(src_row[x] * iw00 + src_row[x + c_cn] * iw01 + src_row1[x] * iw10 + src_row1[x + c_cn] * iw11, W_BITS1); + } + + __device__ void reduce(float& val1, float& val2, float& val3, float* smem1, float* smem2, float* smem3, int tid) + { + smem1[tid] = val1; + smem2[tid] = val2; + smem3[tid] = val3; + __syncthreads(); + + if (tid < 128) + { + smem1[tid] = val1 += smem1[tid + 128]; + smem2[tid] = val2 += smem2[tid + 128]; + smem3[tid] = val3 += smem3[tid + 128]; + } + __syncthreads(); + + if (tid < 64) + { + smem1[tid] = val1 += smem1[tid + 64]; + smem2[tid] = val2 += smem2[tid + 64]; + smem3[tid] = val3 += smem3[tid + 64]; + } + __syncthreads(); + + if (tid < 32) + { + volatile float* vmem1 = smem1; + volatile float* vmem2 = smem2; + volatile float* vmem3 = smem3; + + vmem1[tid] = val1 += vmem1[tid + 32]; + vmem2[tid] = val2 += vmem2[tid + 32]; + vmem3[tid] = val3 += vmem3[tid + 32]; + + vmem1[tid] = val1 += vmem1[tid + 16]; + vmem2[tid] = val2 += vmem2[tid + 16]; + vmem3[tid] = val3 += vmem3[tid + 16]; + + vmem1[tid] = val1 += vmem1[tid + 8]; + vmem2[tid] = val2 += vmem2[tid + 8]; + vmem3[tid] = val3 += vmem3[tid + 8]; + + vmem1[tid] = val1 += vmem1[tid + 4]; + vmem2[tid] = val2 += vmem2[tid + 4]; + vmem3[tid] = val3 += vmem3[tid + 4]; + + vmem1[tid] = val1 += vmem1[tid + 2]; + vmem2[tid] = val2 += vmem2[tid + 2]; + vmem3[tid] = val3 += vmem3[tid + 2]; + + vmem1[tid] = val1 += vmem1[tid + 1]; + vmem2[tid] = val2 += vmem2[tid + 1]; + vmem3[tid] = val3 += vmem3[tid + 1]; + } + } + + __device__ void reduce(float& val1, float& val2, float* smem1, float* smem2, int tid) + { + smem1[tid] = val1; + smem2[tid] = val2; + __syncthreads(); + + if (tid < 128) + { + smem1[tid] = val1 += smem1[tid + 128]; + smem2[tid] = val2 += smem2[tid + 128]; + } + __syncthreads(); + + if (tid < 64) + { + smem1[tid] = val1 += smem1[tid + 64]; + smem2[tid] = val2 += smem2[tid + 64]; + } + __syncthreads(); + + if (tid < 32) + { + volatile float* vmem1 = smem1; + volatile float* vmem2 = smem2; + + vmem1[tid] = val1 += vmem1[tid + 32]; + vmem2[tid] = val2 += vmem2[tid + 32]; + + vmem1[tid] = val1 += vmem1[tid + 16]; + vmem2[tid] = val2 += vmem2[tid + 16]; + + vmem1[tid] = val1 += vmem1[tid + 8]; + vmem2[tid] = val2 += vmem2[tid + 8]; + + vmem1[tid] = val1 += vmem1[tid + 4]; + vmem2[tid] = val2 += vmem2[tid + 4]; + + vmem1[tid] = val1 += vmem1[tid + 2]; + vmem2[tid] = val2 += vmem2[tid + 2]; + + vmem1[tid] = val1 += vmem1[tid + 1]; + vmem2[tid] = val2 += vmem2[tid + 1]; + } + } + + #define SCALE (1.0f / (1 << 20)) + + template + __global__ void lkSparse(const PtrStepb I, const PtrStepb J, const PtrStep dIdx, const PtrStep dIdy, + const float2* prevPts, float2* nextPts, uchar* status, float* err, const int level, const int rows, const int cols) + { + __shared__ float smem1[256]; + __shared__ float smem2[256]; + __shared__ float smem3[256]; + + const int tid = threadIdx.y * blockDim.x + threadIdx.x; + + float2 prevPt = prevPts[blockIdx.x]; + prevPt.x *= (1.0f / (1 << level)); + prevPt.y *= (1.0f / (1 << level)); + + prevPt.x -= c_halfWin_x; + prevPt.y -= c_halfWin_y; + + if (prevPt.x < -c_winSize_x || prevPt.x >= cols || prevPt.y < -c_winSize_y || prevPt.y >= rows) + { + if (level == 0 && tid == 0) + { + status[blockIdx.x] = 0; + + if (calcErr) + err[blockIdx.x] = 0; + } + + return; + } + + // extract the patch from the first image, compute covariation matrix of derivatives + + float A11 = 0; + float A12 = 0; + float A22 = 0; + + int I_patch[PATCH_Y][PATCH_X]; + int dIdx_patch[PATCH_Y][PATCH_X]; + int dIdy_patch[PATCH_Y][PATCH_X]; + + for (int y = threadIdx.y, i = 0; y < c_winSize_y; y += blockDim.y, ++i) + { + for (int x = threadIdx.x, j = 0; x < c_winSize_x_cn; x += blockDim.x, ++j) + { + I_patch[i][j] = linearFilter(I, prevPt, x, y); + + int ixval = linearFilter(dIdx, prevPt, x, y); + int iyval = linearFilter(dIdy, prevPt, x, y); + + dIdx_patch[i][j] = ixval; + dIdy_patch[i][j] = iyval; + + A11 += ixval * ixval; + A12 += ixval * iyval; + A22 += iyval * iyval; + } + } + + reduce(A11, A12, A22, smem1, smem2, smem3, tid); + __syncthreads(); + + A11 = smem1[0]; + A12 = smem2[0]; + A22 = smem3[0]; + + A11 *= SCALE; + A12 *= SCALE; + A22 *= SCALE; + + { + float D = A11 * A22 - A12 * A12; + float minEig = (A22 + A11 - ::sqrtf((A11 - A22) * (A11 - A22) + 4.f * A12 * A12)) / (2 * c_winSize_x * c_winSize_y); + + if (calcErr && tid == 0) + err[blockIdx.x] = minEig; + + if (minEig < c_minEigThreshold || D < numeric_limits::epsilon()) + { + if (level == 0 && tid == 0) + status[blockIdx.x] = 0; + + return; + } + + D = 1.f / D; + + A11 *= D; + A12 *= D; + A22 *= D; + } + + float2 nextPt = nextPts[blockIdx.x]; + nextPt.x *= 2.f; + nextPt.y *= 2.f; + + nextPt.x -= c_halfWin_x; + nextPt.y -= c_halfWin_y; + + bool status_ = true; + + for (int k = 0; k < c_iters; ++k) + { + if (nextPt.x < -c_winSize_x || nextPt.x >= cols || nextPt.y < -c_winSize_y || nextPt.y >= rows) + { + status_ = false; + break; + } + + float b1 = 0; + float b2 = 0; + + for (int y = threadIdx.y, i = 0; y < c_winSize_y; y += blockDim.y, ++i) + { + for (int x = threadIdx.x, j = 0; x < c_winSize_x_cn; x += blockDim.x, ++j) + { + int diff = linearFilter(J, nextPt, x, y) - I_patch[i][j]; + + b1 += diff * dIdx_patch[i][j]; + b2 += diff * dIdy_patch[i][j]; + } + } + + reduce(b1, b2, smem1, smem2, tid); + __syncthreads(); + + b1 = smem1[0]; + b2 = smem2[0]; + + b1 *= SCALE; + b2 *= SCALE; + + float2 delta; + delta.x = A12 * b2 - A22 * b1; + delta.y = A12 * b1 - A11 * b2; + + nextPt.x += delta.x; + nextPt.y += delta.y; + + if (::fabs(delta.x) < 0.01f && ::fabs(delta.y) < 0.01f) + break; + } + + if (tid == 0) + { + nextPt.x += c_halfWin_x; + nextPt.y += c_halfWin_y; + + nextPts[blockIdx.x] = nextPt; + status[blockIdx.x] = status_; + } + } + + template + void lkSparse_caller(DevMem2Db I, DevMem2Db J, DevMem2D_ dIdx, DevMem2D_ dIdy, + const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount, + int level, dim3 block, cudaStream_t stream) + { + dim3 grid(ptcount); + + if (err) + { + cudaSafeCall( cudaFuncSetCacheConfig(lkSparse, cudaFuncCachePreferL1) ); + + lkSparse<<>>(I, J, dIdx, dIdy, + prevPts, nextPts, status, err, level, I.rows, I.cols); + } + else + { + cudaSafeCall( cudaFuncSetCacheConfig(lkSparse, cudaFuncCachePreferL1) ); + + lkSparse<<>>(I, J, dIdx, dIdy, + prevPts, nextPts, status, err, level, I.rows, I.cols); + } + + cudaSafeCall( cudaGetLastError() ); + + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); + } + + void lkSparse_gpu(DevMem2Db I, DevMem2Db J, DevMem2D_ dIdx, DevMem2D_ dIdy, + const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount, + int level, dim3 block, dim3 patch, cudaStream_t stream) + { + typedef void (*func_t)(DevMem2Db I, DevMem2Db J, DevMem2D_ dIdx, DevMem2D_ dIdy, + const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount, + int level, dim3 block, cudaStream_t stream); + + static const func_t funcs[5][5] = + { + {lkSparse_caller<1, 1>, lkSparse_caller<2, 1>, lkSparse_caller<3, 1>, lkSparse_caller<4, 1>, lkSparse_caller<5, 1>}, + {lkSparse_caller<1, 2>, lkSparse_caller<2, 2>, lkSparse_caller<3, 2>, lkSparse_caller<4, 2>, lkSparse_caller<5, 2>}, + {lkSparse_caller<1, 3>, lkSparse_caller<2, 3>, lkSparse_caller<3, 3>, lkSparse_caller<4, 3>, lkSparse_caller<5, 3>}, + {lkSparse_caller<1, 4>, lkSparse_caller<2, 4>, lkSparse_caller<3, 4>, lkSparse_caller<4, 4>, lkSparse_caller<5, 4>}, + {lkSparse_caller<1, 5>, lkSparse_caller<2, 5>, lkSparse_caller<3, 5>, lkSparse_caller<4, 5>, lkSparse_caller<5, 5>} + }; + + funcs[patch.y - 1][patch.x - 1](I, J, dIdx, dIdy, + prevPts, nextPts, status, err, ptcount, + level, block, stream); + } + + template + __global__ void lkDense(const PtrStepb I, const PtrStepb J, const PtrStep dIdx, const PtrStep dIdy, + PtrStepf u, PtrStepf v, PtrStepf err, const int rows, const int cols) + { + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + if (x >= cols || y >= rows) + return; + + // extract the patch from the first image, compute covariation matrix of derivatives + + float A11 = 0; + float A12 = 0; + float A22 = 0; + + for (int i = 0; i < c_winSize_y; ++i) + { + for (int j = 0; j < c_winSize_x; ++j) + { + int ixval = dIdx(y - c_halfWin_y + i, x - c_halfWin_x + j); + int iyval = dIdy(y - c_halfWin_y + i, x - c_halfWin_x + j); + + A11 += ixval * ixval; + A12 += ixval * iyval; + A22 += iyval * iyval; + } + } + + A11 *= SCALE; + A12 *= SCALE; + A22 *= SCALE; + + { + float D = A11 * A22 - A12 * A12; + float minEig = (A22 + A11 - ::sqrtf((A11 - A22) * (A11 - A22) + 4.f * A12 * A12)) / (2 * c_winSize_x * c_winSize_y); + + if (calcErr) + err(y, x) = minEig; + + if (minEig < c_minEigThreshold || D < numeric_limits::epsilon()) + return; + + D = 1.f / D; + + A11 *= D; + A12 *= D; + A22 *= D; + } + + float2 nextPt; + nextPt.x = x - c_halfWin_x + u(y, x); + nextPt.y = y - c_halfWin_y + v(y, x); + + for (int k = 0; k < c_iters; ++k) + { + if (nextPt.x < -c_winSize_x || nextPt.x >= cols || nextPt.y < -c_winSize_y || nextPt.y >= rows) + break; + + float b1 = 0; + float b2 = 0; + + for (int i = 0; i < c_winSize_y; ++i) + { + for (int j = 0; j < c_winSize_x; ++j) + { + int I_val = I(y - c_halfWin_y + i, x - c_halfWin_x + j); + + int diff = linearFilter(J, nextPt, j, i) - CV_DESCALE(I_val * (1 << W_BITS), W_BITS1 - 5); + + b1 += diff * dIdx(y - c_halfWin_y + i, x - c_halfWin_x + j); + b2 += diff * dIdy(y - c_halfWin_y + i, x - c_halfWin_x + j); + } + } + + b1 *= SCALE; + b2 *= SCALE; + + float2 delta; + delta.x = A12 * b2 - A22 * b1; + delta.y = A12 * b1 - A11 * b2; + + nextPt.x += delta.x; + nextPt.y += delta.y; + + if (::fabs(delta.x) < 0.01f && ::fabs(delta.y) < 0.01f) + break; + } + + u(y, x) = nextPt.x - x + c_halfWin_x; + v(y, x) = nextPt.y - y + c_halfWin_y; + } + + void lkDense_gpu(DevMem2Db I, DevMem2Db J, DevMem2D_ dIdx, DevMem2D_ dIdy, + DevMem2Df u, DevMem2Df v, DevMem2Df* err, cudaStream_t stream) + { + dim3 block(32, 8); + dim3 grid(divUp(I.cols, block.x), divUp(I.rows, block.y)); + + if (err) + { + cudaSafeCall( cudaFuncSetCacheConfig(lkDense, cudaFuncCachePreferL1) ); + + lkDense<<>>(I, J, dIdx, dIdy, u, v, *err, I.rows, I.cols); + cudaSafeCall( cudaGetLastError() ); + } + else + { + cudaSafeCall( cudaFuncSetCacheConfig(lkDense, cudaFuncCachePreferL1) ); + + lkDense<<>>(I, J, dIdx, dIdy, u, v, PtrStepf(), I.rows, I.cols); + cudaSafeCall( cudaGetLastError() ); + } + + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); + } + } +}}} diff --git a/modules/gpu/src/gftt.cpp b/modules/gpu/src/gftt.cpp new file mode 100644 index 000000000..869c155c0 --- /dev/null +++ b/modules/gpu/src/gftt.cpp @@ -0,0 +1,165 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other GpuMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or bpied warranties, including, but not limited to, the bpied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +using namespace std; +using namespace cv; +using namespace cv::gpu; + +#if !defined (HAVE_CUDA) + +void cv::gpu::GoodFeaturesToTrackDetector_GPU::operator ()(const GpuMat&, GpuMat&, const GpuMat&) { throw_nogpu(); } + +#else /* !defined (HAVE_CUDA) */ + +namespace cv { namespace gpu { namespace device +{ + namespace gfft + { + int findCorners_gpu(DevMem2Df eig, float threshold, DevMem2Db mask, float2* corners, int max_count); + void sortCorners_gpu(DevMem2Df eig, float2* corners, int count); + } +}}} + +void cv::gpu::GoodFeaturesToTrackDetector_GPU::operator ()(const GpuMat& image, GpuMat& corners, const GpuMat& mask) +{ + using namespace cv::gpu::device::gfft; + + CV_Assert(qualityLevel > 0 && minDistance >= 0 && maxCorners >= 0); + CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size())); + CV_Assert(TargetArchs::builtWith(GLOBAL_ATOMICS) && DeviceInfo().supports(GLOBAL_ATOMICS)); + + ensureSizeIsEnough(image.size(), CV_32F, eig_); + + if (useHarrisDetector) + cornerHarris(image, eig_, Dx_, Dy_, buf_, blockSize, 3, harrisK); + else + cornerMinEigenVal(image, eig_, Dx_, Dy_, buf_, blockSize, 3); + + double maxVal = 0; + minMax(eig_, 0, &maxVal, GpuMat(), minMaxbuf_); + + ensureSizeIsEnough(1, std::max(1000, static_cast(image.size().area() * 0.05)), CV_32FC2, tmpCorners_); + + int total = findCorners_gpu(eig_, static_cast(maxVal * qualityLevel), mask, tmpCorners_.ptr(), tmpCorners_.cols); + + sortCorners_gpu(eig_, tmpCorners_.ptr(), total); + + if (minDistance < 1) + tmpCorners_.colRange(0, maxCorners > 0 ? std::min(maxCorners, total) : total).copyTo(corners); + else + { + vector tmp(total); + Mat tmpMat(1, total, CV_32FC2, (void*)&tmp[0]); + tmpCorners_.colRange(0, total).download(tmpMat); + + vector tmp2; + tmp2.reserve(total); + + const int cell_size = cvRound(minDistance); + const int grid_width = (image.cols + cell_size - 1) / cell_size; + const int grid_height = (image.rows + cell_size - 1) / cell_size; + + std::vector< std::vector > grid(grid_width * grid_height); + + for (int i = 0; i < total; ++i) + { + Point2f p = tmp[i]; + + bool good = true; + + int x_cell = static_cast(p.x / cell_size); + int y_cell = static_cast(p.y / cell_size); + + int x1 = x_cell - 1; + int y1 = y_cell - 1; + int x2 = x_cell + 1; + int y2 = y_cell + 1; + + // boundary check + x1 = std::max(0, x1); + y1 = std::max(0, y1); + x2 = std::min(grid_width - 1, x2); + y2 = std::min(grid_height - 1, y2); + + for (int yy = y1; yy <= y2; yy++) + { + for (int xx = x1; xx <= x2; xx++) + { + vector& m = grid[yy * grid_width + xx]; + + if (!m.empty()) + { + for(int j = 0; j < m.size(); j++) + { + float dx = p.x - m[j].x; + float dy = p.y - m[j].y; + + if (dx * dx + dy * dy < minDistance * minDistance) + { + good = false; + goto break_out; + } + } + } + } + } + + break_out: + + if(good) + { + grid[y_cell * grid_width + x_cell].push_back(p); + + tmp2.push_back(p); + + if (maxCorners > 0 && tmp2.size() == maxCorners) + break; + } + } + + corners.upload(Mat(1, tmp2.size(), CV_32FC2, &tmp2[0])); + } +} + +#endif /* !defined (HAVE_CUDA) */ diff --git a/modules/gpu/src/pyrlk.cpp b/modules/gpu/src/pyrlk.cpp new file mode 100644 index 000000000..eaafd104e --- /dev/null +++ b/modules/gpu/src/pyrlk.cpp @@ -0,0 +1,295 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other GpuMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or bpied warranties, including, but not limited to, the bpied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +using namespace std; +using namespace cv; +using namespace cv::gpu; + +#if !defined (HAVE_CUDA) + +void cv::gpu::PyrLKOpticalFlow::sparse(const GpuMat&, const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat*) { throw_nogpu(); } +void cv::gpu::PyrLKOpticalFlow::dense(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat*) { throw_nogpu(); } + +#else /* !defined (HAVE_CUDA) */ + +namespace cv { namespace gpu { namespace device +{ + namespace pyrlk + { + void loadConstants(int cn, float minEigThreshold, int2 winSize, int iters); + + void calcSharrDeriv_gpu(DevMem2Db src, DevMem2D_ dx_buf, DevMem2D_ dy_buf, DevMem2D_ dIdx, DevMem2D_ dIdy, int cn, + cudaStream_t stream = 0); + + void lkSparse_gpu(DevMem2Db I, DevMem2Db J, DevMem2D_ dIdx, DevMem2D_ dIdy, + const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount, + int level, dim3 block, dim3 patch, cudaStream_t stream = 0); + + void lkDense_gpu(DevMem2Db I, DevMem2Db J, DevMem2D_ dIdx, DevMem2D_ dIdy, + DevMem2Df u, DevMem2Df v, DevMem2Df* err, cudaStream_t stream = 0); + } +}}} + +void cv::gpu::PyrLKOpticalFlow::calcSharrDeriv(const GpuMat& src, GpuMat& dIdx, GpuMat& dIdy) +{ + using namespace cv::gpu::device::pyrlk; + + CV_Assert(src.rows > 1 && src.cols > 1); + CV_Assert(src.depth() == CV_8U); + + const int cn = src.channels(); + + ensureSizeIsEnough(src.size(), CV_MAKETYPE(CV_16S, cn), dx_calcBuf_); + ensureSizeIsEnough(src.size(), CV_MAKETYPE(CV_16S, cn), dy_calcBuf_); + + const int colsn = src.cols * cn; + + calcSharrDeriv_gpu(src, dx_calcBuf_, dy_calcBuf_, dIdx, dIdy, cn); +} + +void cv::gpu::PyrLKOpticalFlow::buildImagePyramid(const GpuMat& img0, vector& pyr, bool withBorder) +{ + pyr.resize(maxLevel + 1); + + Size sz = img0.size(); + + for (int level = 0; level <= maxLevel; ++level) + { + GpuMat temp; + + if (withBorder) + { + temp.create(sz.height + winSize.height * 2, sz.width + winSize.width * 2, img0.type()); + pyr[level] = temp(Rect(winSize.width, winSize.height, sz.width, sz.height)); + } + else + { + ensureSizeIsEnough(sz, img0.type(), pyr[level]); + } + + if (level == 0) + img0.copyTo(pyr[level]); + else + pyrDown(pyr[level - 1], pyr[level]); + + if (withBorder) + copyMakeBorder(pyr[level], temp, winSize.height, winSize.height, winSize.width, winSize.width, BORDER_REFLECT_101); + + sz = Size((sz.width + 1) / 2, (sz.height + 1) / 2); + + if (sz.width <= winSize.width || sz.height <= winSize.height) + { + maxLevel = level; + break; + } + } +} + +void cv::gpu::PyrLKOpticalFlow::sparse(const GpuMat& prevImg, const GpuMat& nextImg, const GpuMat& prevPts, GpuMat& nextPts, GpuMat& status, GpuMat* err) +{ + using namespace cv::gpu::device::pyrlk; + + if (prevPts.empty()) + { + nextPts.release(); + status.release(); + if (err) err->release(); + return; + } + + derivLambda = std::min(std::max(derivLambda, 0.0), 1.0); + + iters = std::min(std::max(iters, 0), 100); + + const int cn = prevImg.channels(); + + dim3 block; + + if (winSize.width * cn > 32) + { + block.x = 32; + block.y = 8; + } + else + { + block.x = block.y = 16; + } + + dim3 patch((winSize.width * cn + block.x - 1) / block.x, (winSize.height + block.y - 1) / block.y); + + CV_Assert(derivLambda >= 0); + CV_Assert(maxLevel >= 0 && winSize.width > 2 && winSize.height > 2); + CV_Assert(prevImg.size() == nextImg.size() && prevImg.type() == nextImg.type()); + CV_Assert(patch.x > 0 && patch.x < 6 && patch.y > 0 && patch.y < 6); + CV_Assert(prevPts.rows == 1 && prevPts.type() == CV_32FC2); + + if (useInitialFlow) + CV_Assert(nextPts.size() == prevPts.size() && nextPts.type() == CV_32FC2); + else + ensureSizeIsEnough(1, prevPts.cols, prevPts.type(), nextPts); + + GpuMat temp1 = (useInitialFlow ? nextPts : prevPts).reshape(1); + GpuMat temp2 = nextPts.reshape(1); + multiply(temp1, Scalar::all(1.0 / (1 << maxLevel) / 2.0), temp2); + + ensureSizeIsEnough(1, prevPts.cols, CV_8UC1, status); + status.setTo(Scalar::all(1)); + + if (err) + ensureSizeIsEnough(1, prevPts.cols, CV_32FC1, *err); + + // build the image pyramids. + // we pad each level with +/-winSize.{width|height} + // pixels to simplify the further patch extraction. + + buildImagePyramid(prevImg, prevPyr_, true); + buildImagePyramid(nextImg, nextPyr_, true); + + // dI/dx ~ Ix, dI/dy ~ Iy + + ensureSizeIsEnough(prevImg.rows + winSize.height * 2, prevImg.cols + winSize.width * 2, CV_MAKETYPE(CV_16S, cn), dx_buf_); + ensureSizeIsEnough(prevImg.rows + winSize.height * 2, prevImg.cols + winSize.width * 2, CV_MAKETYPE(CV_16S, cn), dy_buf_); + + loadConstants(cn, minEigThreshold, make_int2(winSize.width, winSize.height), iters); + + for (int level = maxLevel; level >= 0; level--) + { + Size imgSize = prevPyr_[level].size(); + + GpuMat dxWhole(imgSize.height + winSize.height * 2, imgSize.width + winSize.width * 2, dx_buf_.type(), dx_buf_.data, dx_buf_.step); + GpuMat dyWhole(imgSize.height + winSize.height * 2, imgSize.width + winSize.width * 2, dy_buf_.type(), dy_buf_.data, dy_buf_.step); + dxWhole.setTo(Scalar::all(0)); + dyWhole.setTo(Scalar::all(0)); + GpuMat dIdx = dxWhole(Rect(winSize.width, winSize.height, imgSize.width, imgSize.height)); + GpuMat dIdy = dyWhole(Rect(winSize.width, winSize.height, imgSize.width, imgSize.height)); + + calcSharrDeriv(prevPyr_[level], dIdx, dIdy); + + lkSparse_gpu(prevPyr_[level], nextPyr_[level], dIdx, dIdy, + prevPts.ptr(), nextPts.ptr(), status.ptr(), level == 0 && err ? err->ptr() : 0, prevPts.cols, + level, block, patch); + } +} + +void cv::gpu::PyrLKOpticalFlow::dense(const GpuMat& prevImg, const GpuMat& nextImg, GpuMat& u, GpuMat& v, GpuMat* err) +{ + using namespace cv::gpu::device::pyrlk; + + derivLambda = std::min(std::max(derivLambda, 0.0), 1.0); + + iters = std::min(std::max(iters, 0), 100); + + CV_Assert(prevImg.type() == CV_8UC1); + CV_Assert(prevImg.size() == nextImg.size() && prevImg.type() == nextImg.type()); + CV_Assert(derivLambda >= 0); + CV_Assert(maxLevel >= 0 && winSize.width > 2 && winSize.height > 2); + + if (useInitialFlow) + { + CV_Assert(u.size() == prevImg.size() && u.type() == CV_32FC1); + CV_Assert(v.size() == prevImg.size() && v.type() == CV_32FC1); + } + else + { + u.create(prevImg.size(), CV_32FC1); + v.create(prevImg.size(), CV_32FC1); + + u.setTo(Scalar::all(0)); + v.setTo(Scalar::all(0)); + } + + if (err) + err->create(prevImg.size(), CV_32FC1); + + // build the image pyramids. + // we pad each level with +/-winSize.{width|height} + // pixels to simplify the further patch extraction. + + buildImagePyramid(prevImg, prevPyr_, true); + buildImagePyramid(nextImg, nextPyr_, true); + buildImagePyramid(u, uPyr_, false); + buildImagePyramid(v, vPyr_, false); + + // dI/dx ~ Ix, dI/dy ~ Iy + + ensureSizeIsEnough(prevImg.rows + winSize.height * 2, prevImg.cols + winSize.width * 2, CV_16SC1, dx_buf_); + ensureSizeIsEnough(prevImg.rows + winSize.height * 2, prevImg.cols + winSize.width * 2, CV_16SC1, dy_buf_); + + loadConstants(1, minEigThreshold, make_int2(winSize.width, winSize.height), iters); + + DevMem2Df derr = err ? *err : DevMem2Df(); + + for (int level = maxLevel; level >= 0; level--) + { + Size imgSize = prevPyr_[level].size(); + + GpuMat dxWhole(imgSize.height + winSize.height * 2, imgSize.width + winSize.width * 2, dx_buf_.type(), dx_buf_.data, dx_buf_.step); + GpuMat dyWhole(imgSize.height + winSize.height * 2, imgSize.width + winSize.width * 2, dy_buf_.type(), dy_buf_.data, dy_buf_.step); + dxWhole.setTo(Scalar::all(0)); + dyWhole.setTo(Scalar::all(0)); + GpuMat dIdx = dxWhole(Rect(winSize.width, winSize.height, imgSize.width, imgSize.height)); + GpuMat dIdy = dyWhole(Rect(winSize.width, winSize.height, imgSize.width, imgSize.height)); + + calcSharrDeriv(prevPyr_[level], dIdx, dIdy); + + lkDense_gpu(prevPyr_[level], nextPyr_[level], dIdx, dIdy, uPyr_[level], vPyr_[level], + level == 0 && err ? &derr : 0); + + if (level == 0) + { + uPyr_[0].copyTo(u); + vPyr_[0].copyTo(v); + } + else + { + pyrUp(uPyr_[level], uPyr_[level - 1]); + pyrUp(vPyr_[level], vPyr_[level - 1]); + + multiply(uPyr_[level - 1], Scalar::all(2), uPyr_[level - 1]); + multiply(vPyr_[level - 1], Scalar::all(2), vPyr_[level - 1]); + } + } +} + +#endif /* !defined (HAVE_CUDA) */ diff --git a/modules/gpu/test/test_precomp.hpp b/modules/gpu/test/test_precomp.hpp index f40fc7b3b..8d9cc43f5 100644 --- a/modules/gpu/test/test_precomp.hpp +++ b/modules/gpu/test/test_precomp.hpp @@ -55,6 +55,7 @@ #include "opencv2/highgui/highgui.hpp" #include "opencv2/calib3d/calib3d.hpp" #include "opencv2/imgproc/imgproc.hpp" +#include "opencv2/video/video.hpp" #include "opencv2/ts/ts.hpp" #include "opencv2/ts/ts_perf.hpp" #include "opencv2/gpu/gpu.hpp" diff --git a/modules/gpu/test/test_video.cpp b/modules/gpu/test/test_video.cpp index 2b3ff8067..b29036c2b 100644 --- a/modules/gpu/test/test_video.cpp +++ b/modules/gpu/test/test_video.cpp @@ -254,4 +254,172 @@ TEST_P(InterpolateFrames, Regression) INSTANTIATE_TEST_CASE_P(Video, InterpolateFrames, ALL_DEVICES); -#endif +///////////////////////////////////////////////////////////////////////////////////////////////// +// GoodFeaturesToTrack + +PARAM_TEST_CASE(GoodFeaturesToTrack, cv::gpu::DeviceInfo, double) +{ + cv::gpu::DeviceInfo devInfo; + + cv::Mat image; + + int maxCorners; + double qualityLevel; + double minDistance; + + std::vector pts_gold; + + virtual void SetUp() + { + devInfo = GET_PARAM(0); + minDistance = GET_PARAM(1); + + cv::gpu::setDevice(devInfo.deviceID()); + + image = readImage("opticalflow/frame0.png", cv::IMREAD_GRAYSCALE); + ASSERT_FALSE(image.empty()); + + maxCorners = 1000; + qualityLevel= 0.01; + + cv::goodFeaturesToTrack(image, pts_gold, maxCorners, qualityLevel, minDistance); + } +}; + +TEST_P(GoodFeaturesToTrack, Accuracy) +{ + cv::gpu::GoodFeaturesToTrackDetector_GPU detector(maxCorners, qualityLevel, minDistance); + + cv::gpu::GpuMat d_pts; + + detector(loadMat(image), d_pts); + + std::vector pts(d_pts.cols); + cv::Mat pts_mat(1, d_pts.cols, CV_32FC2, (void*)&pts[0]); + d_pts.download(pts_mat); + + ASSERT_EQ(pts_gold.size(), pts.size()); + + size_t mistmatch = 0; + + for (size_t i = 0; i < pts.size(); ++i) + { + cv::Point2i a = pts_gold[i]; + cv::Point2i b = pts[i]; + + bool eq = std::abs(a.x - b.x) < 1 && std::abs(a.y - b.y) < 1; + + if (!eq) + ++mistmatch; + } + + double bad_ratio = static_cast(mistmatch) / pts.size(); + + ASSERT_LE(bad_ratio, 0.01); +} + +INSTANTIATE_TEST_CASE_P(Video, GoodFeaturesToTrack, Combine(ALL_DEVICES, Values(0.0, 3.0))); + +///////////////////////////////////////////////////////////////////////////////////////////////// +// PyrLKOpticalFlow + +PARAM_TEST_CASE(PyrLKOpticalFlowSparse, cv::gpu::DeviceInfo, bool) +{ + cv::gpu::DeviceInfo devInfo; + + cv::Mat frame0; + cv::Mat frame1; + + std::vector pts; + + std::vector nextPts_gold; + std::vector status_gold; + std::vector err_gold; + + virtual void SetUp() + { + devInfo = GET_PARAM(0); + bool useGray = GET_PARAM(1); + + cv::gpu::setDevice(devInfo.deviceID()); + + frame0 = readImage("opticalflow/frame0.png", useGray ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR); + ASSERT_FALSE(frame0.empty()); + + frame1 = readImage("opticalflow/frame1.png", useGray ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR); + ASSERT_FALSE(frame1.empty()); + + cv::Mat gray_frame; + if (useGray) + gray_frame = frame0; + else + cv::cvtColor(frame0, gray_frame, cv::COLOR_BGR2GRAY); + + cv::goodFeaturesToTrack(gray_frame, pts, 1000, 0.01, 0.0); + + cv::calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts_gold, status_gold, err_gold, cv::Size(21, 21), 3, + cv::TermCriteria(cv::TermCriteria::COUNT + cv::TermCriteria::EPS, 30, 0.01), 0.5, CV_LKFLOW_GET_MIN_EIGENVALS); + } +}; + +TEST_P(PyrLKOpticalFlowSparse, Accuracy) +{ + cv::gpu::PyrLKOpticalFlow d_pyrLK; + + cv::gpu::GpuMat d_pts; + cv::Mat pts_mat(1, pts.size(), CV_32FC2, (void*)&pts[0]); + d_pts.upload(pts_mat); + + cv::gpu::GpuMat d_nextPts; + cv::gpu::GpuMat d_status; + cv::gpu::GpuMat d_err; + + d_pyrLK.sparse(loadMat(frame0), loadMat(frame1), d_pts, d_nextPts, d_status, &d_err); + + std::vector nextPts(d_nextPts.cols); + cv::Mat nextPts_mat(1, d_nextPts.cols, CV_32FC2, (void*)&nextPts[0]); + d_nextPts.download(nextPts_mat); + + std::vector status(d_status.cols); + cv::Mat status_mat(1, d_status.cols, CV_8UC1, (void*)&status[0]); + d_status.download(status_mat); + + std::vector err(d_err.cols); + cv::Mat err_mat(1, d_err.cols, CV_32FC1, (void*)&err[0]); + d_err.download(err_mat); + + ASSERT_EQ(nextPts_gold.size(), nextPts.size()); + ASSERT_EQ(status_gold.size(), status.size()); + ASSERT_EQ(err_gold.size(), err.size()); + + size_t mistmatch = 0; + + for (size_t i = 0; i < nextPts.size(); ++i) + { + if (status[i] != status_gold[i]) + { + ++mistmatch; + continue; + } + + if (status[i]) + { + cv::Point2i a = nextPts[i]; + cv::Point2i b = nextPts_gold[i]; + + bool eq = std::abs(a.x - b.x) < 1 && std::abs(a.y - b.y) < 1; + float errdiff = std::abs(err[i] - err_gold[i]); + + if (!eq || errdiff > 1e-4) + ++mistmatch; + } + } + + double bad_ratio = static_cast(mistmatch) / nextPts.size(); + + ASSERT_LE(bad_ratio, 0.01); +} + +INSTANTIATE_TEST_CASE_P(Video, PyrLKOpticalFlowSparse, Combine(ALL_DEVICES, Bool())); + +#endif // HAVE_CUDA diff --git a/samples/gpu/optical_flow.cpp b/samples/gpu/brox_optical_flow.cpp similarity index 95% rename from samples/gpu/optical_flow.cpp rename to samples/gpu/brox_optical_flow.cpp index c3015286a..5b19fb58a 100644 --- a/samples/gpu/optical_flow.cpp +++ b/samples/gpu/brox_optical_flow.cpp @@ -40,7 +40,7 @@ int main(int argc, const char* argv[]) if (cmd.get("help")) { - cout << "Usage: optical_flow [options]" << endl; + cout << "Usage: brox_optical_flow [options]" << endl; cout << "Avaible options:" << endl; cmd.printParams(); return 0; diff --git a/samples/gpu/performance/tests.cpp b/samples/gpu/performance/tests.cpp index 1188b9532..57583c405 100644 --- a/samples/gpu/performance/tests.cpp +++ b/samples/gpu/performance/tests.cpp @@ -2,6 +2,7 @@ #include "opencv2/imgproc/imgproc.hpp" #include "opencv2/highgui/highgui.hpp" #include "opencv2/calib3d/calib3d.hpp" +#include "opencv2/video/video.hpp" #include "opencv2/gpu/gpu.hpp" #include "performance.h" @@ -1109,3 +1110,76 @@ TEST(gemm) GPU_OFF; } } + +TEST(GoodFeaturesToTrack) +{ + Mat src = imread(abspath("aloeL.jpg"), IMREAD_GRAYSCALE); + if (src.empty()) throw runtime_error("can't open aloeL.jpg"); + + vector pts; + + goodFeaturesToTrack(src, pts, 8000, 0.01, 0.0); + + CPU_ON; + goodFeaturesToTrack(src, pts, 8000, 0.01, 0.0); + CPU_OFF; + + gpu::GoodFeaturesToTrackDetector_GPU detector(8000, 0.01, 0.0); + + gpu::GpuMat d_src(src); + gpu::GpuMat d_pts; + + detector(d_src, d_pts); + + GPU_ON; + detector(d_src, d_pts); + GPU_OFF; +} + +TEST(PyrLKOpticalFlow) +{ + Mat frame0 = imread(abspath("rubberwhale1.png")); + if (frame0.empty()) throw runtime_error("can't open rubberwhale1.png"); + + Mat frame1 = imread(abspath("rubberwhale2.png")); + if (frame1.empty()) throw runtime_error("can't open rubberwhale2.png"); + + Mat gray_frame; + cvtColor(frame0, gray_frame, COLOR_BGR2GRAY); + + for (int points = 1000; points <= 8000; points *= 2) + { + SUBTEST << points; + + vector pts; + goodFeaturesToTrack(gray_frame, pts, points, 0.01, 0.0); + + vector nextPts; + vector status; + + calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts, status, noArray()); + + CPU_ON; + calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts, status, noArray()); + CPU_OFF; + + gpu::PyrLKOpticalFlow d_pyrLK; + + gpu::GpuMat d_frame0(frame0); + gpu::GpuMat d_frame1(frame1); + + gpu::GpuMat d_pts; + Mat pts_mat(1, pts.size(), CV_32FC2, (void*)&pts[0]); + d_pts.upload(pts_mat); + + gpu::GpuMat d_nextPts; + gpu::GpuMat d_status; + gpu::GpuMat d_err; + + d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status); + + GPU_ON; + d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status); + GPU_OFF; + } +} diff --git a/samples/gpu/pyrlk_optical_flow.cpp b/samples/gpu/pyrlk_optical_flow.cpp new file mode 100644 index 000000000..cbc1348e2 --- /dev/null +++ b/samples/gpu/pyrlk_optical_flow.cpp @@ -0,0 +1,279 @@ +#include +#include + +#include "cvconfig.h" +#include "opencv2/core/core.hpp" +#include "opencv2/core/opengl_interop.hpp" +#include "opencv2/imgproc/imgproc.hpp" +#include "opencv2/highgui/highgui.hpp" +#include "opencv2/video/video.hpp" +#include "opencv2/gpu/gpu.hpp" + +using namespace std; +using namespace cv; +using namespace cv::gpu; + +void download(const GpuMat& d_mat, vector& vec) +{ + vec.resize(d_mat.cols); + Mat mat(1, d_mat.cols, CV_32FC2, (void*)&vec[0]); + d_mat.download(mat); +} + +void download(const GpuMat& d_mat, vector& vec) +{ + vec.resize(d_mat.cols); + Mat mat(1, d_mat.cols, CV_8UC1, (void*)&vec[0]); + d_mat.download(mat); +} + +void drawArrows(Mat& frame, const vector& prevPts, const vector& nextPts, const vector& status, Scalar line_color = Scalar(0, 0, 255)) +{ + for (size_t i = 0; i < prevPts.size(); ++i) + { + if (status[i]) + { + int line_thickness = 1; + + Point p = prevPts[i]; + Point q = nextPts[i]; + + double angle = atan2((double) p.y - q.y, (double) p.x - q.x); + + double hypotenuse = sqrt( (double)(p.y - q.y)*(p.y - q.y) + (double)(p.x - q.x)*(p.x - q.x) ); + + if (hypotenuse < 1.0) + continue; + + // Here we lengthen the arrow by a factor of three. + q.x = (int) (p.x - 3 * hypotenuse * cos(angle)); + q.y = (int) (p.y - 3 * hypotenuse * sin(angle)); + + // Now we draw the main line of the arrow. + line(frame, p, q, line_color, line_thickness); + + // Now draw the tips of the arrow. I do some scaling so that the + // tips look proportional to the main line of the arrow. + + p.x = (int) (q.x + 9 * cos(angle + CV_PI / 4)); + p.y = (int) (q.y + 9 * sin(angle + CV_PI / 4)); + line(frame, p, q, line_color, line_thickness); + + p.x = (int) (q.x + 9 * cos(angle - CV_PI / 4)); + p.y = (int) (q.y + 9 * sin(angle - CV_PI / 4)); + line(frame, p, q, line_color, line_thickness); + } + } +} + +#ifdef HAVE_OPENGL + +struct DrawData +{ + GlTexture tex; + GlArrays arr; +}; + +void drawCallback(void* userdata) +{ + DrawData* data = static_cast(userdata); + + if (data->tex.empty() || data->arr.empty()) + return; + + static GlCamera camera; + static bool init_camera = true; + + if (init_camera) + { + camera.setOrthoProjection(0.0, 1.0, 1.0, 0.0, 0.0, 1.0); + camera.lookAt(Point3d(0.0, 0.0, 1.0), Point3d(0.0, 0.0, 0.0), Point3d(0.0, 1.0, 0.0)); + init_camera = false; + } + + camera.setupProjectionMatrix(); + camera.setupModelViewMatrix(); + + render(data->tex); + render(data->arr, RenderMode::TRIANGLES); +} + +#endif + +template inline T clamp (T x, T a, T b) +{ + return ((x) > (a) ? ((x) < (b) ? (x) : (b)) : (a)); +} + +template inline T mapValue(T x, T a, T b, T c, T d) +{ + x = clamp(x, a, b); + return c + (d - c) * (x - a) / (b - a); +} + +void getFlowField(const Mat& u, const Mat& v, Mat& flowField) +{ + float maxDisplacement = 1.0f; + + for (int i = 0; i < u.rows; ++i) + { + const float* ptr_u = u.ptr(i); + const float* ptr_v = v.ptr(i); + + for (int j = 0; j < u.cols; ++j) + { + float d = max(fabsf(ptr_u[j]), fabsf(ptr_v[j])); + + if (d > maxDisplacement) + maxDisplacement = d; + } + } + + flowField.create(u.size(), CV_8UC4); + + for (int i = 0; i < flowField.rows; ++i) + { + const float* ptr_u = u.ptr(i); + const float* ptr_v = v.ptr(i); + + + Vec4b* row = flowField.ptr(i); + + for (int j = 0; j < flowField.cols; ++j) + { + row[j][0] = 0; + row[j][1] = static_cast (mapValue (-ptr_v[j], -maxDisplacement, maxDisplacement, 0.0f, 255.0f)); + row[j][2] = static_cast (mapValue ( ptr_u[j], -maxDisplacement, maxDisplacement, 0.0f, 255.0f)); + row[j][3] = 255; + } + } +} + +int main(int argc, const char* argv[]) +{ + const char* keys = + "{ h | help | false | print help message }" + "{ l | left | | specify left image }" + "{ r | right | | specify right image }" + "{ g | gray | false | use grayscale sources [PyrLK Sparse] }" + "{ p | points | 4000 | specify points count [GoodFeatureToTrack] }"; + + CommandLineParser cmd(argc, argv, keys); + + if (cmd.get("help")) + { + cout << "Usage: pyrlk_optical_flow [options]" << endl; + cout << "Avaible options:" << endl; + cmd.printParams(); + return 0; + } + + string fname0 = cmd.get("left"); + string fname1 = cmd.get("right"); + + if (fname0.empty() || fname1.empty()) + { + cerr << "Missing input file names" << endl; + return -1; + } + + bool useGray = cmd.get("gray"); + int points = cmd.get("points"); + + Mat frame0 = imread(fname0); + Mat frame1 = imread(fname1); + + if (frame0.empty() || frame1.empty()) + { + cout << "Can't load input images" << endl; + return -1; + } + + namedWindow("PyrLK [Sparse]", WINDOW_NORMAL); + namedWindow("PyrLK [Dense] Flow Field", WINDOW_NORMAL); + + #ifdef HAVE_OPENGL + namedWindow("PyrLK [Dense]", WINDOW_OPENGL); + + setGlDevice(); + #endif + + cout << "Image size : " << frame0.cols << " x " << frame0.rows << endl; + cout << "Points count : " << points << endl; + + cout << endl; + + Mat frame0Gray; + cvtColor(frame0, frame0Gray, COLOR_BGR2GRAY); + Mat frame1Gray; + cvtColor(frame1, frame1Gray, COLOR_BGR2GRAY); + + // goodFeaturesToTrack + + GoodFeaturesToTrackDetector_GPU detector(points, 0.01, 0.0); + + GpuMat d_frame0Gray(frame0Gray); + GpuMat d_prevPts; + + detector(d_frame0Gray, d_prevPts); + + // Sparse + + PyrLKOpticalFlow d_pyrLK; + + GpuMat d_frame0(frame0); + GpuMat d_frame1(frame1); + GpuMat d_frame1Gray(frame1Gray); + GpuMat d_nextPts; + GpuMat d_status; + + d_pyrLK.sparse(useGray ? d_frame0Gray : d_frame0, useGray ? d_frame1Gray : d_frame1, d_prevPts, d_nextPts, d_status); + + // Draw arrows + + vector prevPts(d_prevPts.cols); + download(d_prevPts, prevPts); + + vector nextPts(d_nextPts.cols); + download(d_nextPts, nextPts); + + vector status(d_status.cols); + download(d_status, status); + + drawArrows(frame0, prevPts, nextPts, status, Scalar(255, 0, 0)); + + imshow("PyrLK [Sparse]", frame0); + + // Dense + + GpuMat d_u; + GpuMat d_v; + + d_pyrLK.dense(d_frame0Gray, d_frame1Gray, d_u, d_v); + + // Draw flow field + + Mat flowField; + getFlowField(Mat(d_u), Mat(d_v), flowField); + + imshow("PyrLK [Dense] Flow Field", flowField); + + #ifdef HAVE_OPENGL + setOpenGlContext("PyrLK [Dense]"); + + GpuMat d_vertex, d_colors; + createOpticalFlowNeedleMap(d_u, d_v, d_vertex, d_colors); + + DrawData drawData; + + drawData.tex.copyFrom(d_frame0Gray); + drawData.arr.setVertexArray(d_vertex); + drawData.arr.setColorArray(d_colors, false); + + setOpenGlDrawCallback("PyrLK [Dense]", drawCallback, &drawData); + #endif + + waitKey(); + + return 0; +}