diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp index 2b01f94e3..1cace848c 100644 --- a/modules/ocl/include/opencv2/ocl/ocl.hpp +++ b/modules/ocl/include/opencv2/ocl/ocl.hpp @@ -407,6 +407,9 @@ namespace cv //! computes element-wise product of the two arrays (c = a * b) // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4 CV_EXPORTS void multiply(const oclMat &a, const oclMat &b, oclMat &c, double scale = 1); + //! multiplies matrix to a number (dst = scalar * src) + // supports CV_32FC1 only + CV_EXPORTS void multiply(double scalar, const oclMat &src, oclMat &dst); //! computes element-wise quotient of the two arrays (c = a / b) // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4 CV_EXPORTS void divide(const oclMat &a, const oclMat &b, oclMat &c, double scale = 1); @@ -1372,6 +1375,7 @@ namespace cv private: oclMat minSSD, leBuf, riBuf; }; + class CV_EXPORTS StereoBeliefPropagation { public: @@ -1402,6 +1406,7 @@ namespace cv std::vector datas; oclMat out; }; + class CV_EXPORTS StereoConstantSpaceBP { public: @@ -1440,6 +1445,94 @@ namespace cv oclMat temp; oclMat out; }; + + // Implementation of the Zach, Pock and Bischof Dual TV-L1 Optical Flow method + // + // see reference: + // [1] C. Zach, T. Pock and H. Bischof, "A Duality Based Approach for Realtime TV-L1 Optical Flow". + // [2] Javier Sanchez, Enric Meinhardt-Llopis and Gabriele Facciolo. "TV-L1 Optical Flow Estimation". + class CV_EXPORTS OpticalFlowDual_TVL1_OCL + { + public: + OpticalFlowDual_TVL1_OCL(); + + void operator ()(const oclMat& I0, const oclMat& I1, oclMat& flowx, oclMat& flowy); + + void collectGarbage(); + + /** + * Time step of the numerical scheme. + */ + double tau; + + /** + * Weight parameter for the data term, attachment parameter. + * This is the most relevant parameter, which determines the smoothness of the output. + * The smaller this parameter is, the smoother the solutions we obtain. + * It depends on the range of motions of the images, so its value should be adapted to each image sequence. + */ + double lambda; + + /** + * Weight parameter for (u - v)^2, tightness parameter. + * It serves as a link between the attachment and the regularization terms. + * In theory, it should have a small value in order to maintain both parts in correspondence. + * The method is stable for a large range of values of this parameter. + */ + double theta; + + /** + * Number of scales used to create the pyramid of images. + */ + int nscales; + + /** + * Number of warpings per scale. + * Represents the number of times that I1(x+u0) and grad( I1(x+u0) ) are computed per scale. + * This is a parameter that assures the stability of the method. + * It also affects the running time, so it is a compromise between speed and accuracy. + */ + int warps; + + /** + * Stopping criterion threshold used in the numerical scheme, which is a trade-off between precision and running time. + * A small value will yield more accurate solutions at the expense of a slower convergence. + */ + double epsilon; + + /** + * Stopping criterion iterations number used in the numerical scheme. + */ + int iterations; + + bool useInitialFlow; + + private: + void procOneScale(const oclMat& I0, const oclMat& I1, oclMat& u1, oclMat& u2); + + std::vector I0s; + std::vector I1s; + std::vector u1s; + std::vector u2s; + + oclMat I1x_buf; + oclMat I1y_buf; + + oclMat I1w_buf; + oclMat I1wx_buf; + oclMat I1wy_buf; + + oclMat grad_buf; + oclMat rho_c_buf; + + oclMat p11_buf; + oclMat p12_buf; + oclMat p21_buf; + oclMat p22_buf; + + oclMat diff_buf; + oclMat norm_buf; + }; } } #if defined _MSC_VER && _MSC_VER >= 1200 diff --git a/modules/ocl/src/arithm.cpp b/modules/ocl/src/arithm.cpp index d679a9348..ed2515dc6 100644 --- a/modules/ocl/src/arithm.cpp +++ b/modules/ocl/src/arithm.cpp @@ -22,6 +22,7 @@ // Jiang Liyuan, jlyuan001.good@163.com // Rock Li, Rock.Li@amd.com // Zailong Wu, bullet@yeah.net +// Peng Xiao, pengxiao@outlook.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -286,6 +287,7 @@ void cv::ocl::multiply(const oclMat &src1, const oclMat &src2, oclMat &dst, doub else arithmetic_run(src1, src2, dst, "arithm_mul", &arithm_mul, (void *)(&scalar)); } + void cv::ocl::divide(const oclMat &src1, const oclMat &src2, oclMat &dst, double scalar) { @@ -468,6 +470,11 @@ void cv::ocl::subtract(const Scalar &src2, const oclMat &src1, oclMat &dst, cons const char **kernelString = mask.data ? &arithm_add_scalar_mask : &arithm_add_scalar; arithmetic_scalar( src1, src2, dst, mask, kernelName, kernelString, -1); } +void cv::ocl::multiply(double scalar, const oclMat &src, oclMat &dst) +{ + string kernelName = "arithm_muls"; + arithmetic_scalar_run( src, dst, kernelName, &arithm_mul, scalar); +} void cv::ocl::divide(double scalar, const oclMat &src, oclMat &dst) { if(!src.clCxt->supportsFeature(Context::CL_DOUBLE)) diff --git a/modules/ocl/src/opencl/tvl1flow.cl b/modules/ocl/src/opencl/tvl1flow.cl new file mode 100644 index 000000000..e0ff7307b --- /dev/null +++ b/modules/ocl/src/opencl/tvl1flow.cl @@ -0,0 +1,407 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Jin Ma jin@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +__kernel void centeredGradientKernel(__global const float* src, int src_col, int src_row, int src_step, +__global float* dx, __global float* dy, int dx_step) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if((x < src_col)&&(y < src_row)) + { + int src_x1 = (x + 1) < (src_col -1)? (x + 1) : (src_col - 1); + int src_x2 = (x - 1) > 0 ? (x -1) : 0; + + //if(src[y * src_step + src_x1] == src[y * src_step+ src_x2]) + //{ + // printf("y = %d\n", y); + // printf("src_x1 = %d\n", src_x1); + // printf("src_x2 = %d\n", src_x2); + //} + dx[y * dx_step+ x] = 0.5f * (src[y * src_step + src_x1] - src[y * src_step+ src_x2]); + + int src_y1 = (y+1) < (src_row - 1) ? (y + 1) : (src_row - 1); + int src_y2 = (y - 1) > 0 ? (y - 1) : 0; + dy[y * dx_step+ x] = 0.5f * (src[src_y1 * src_step + x] - src[src_y2 * src_step+ x]); + } + +} + +float bicubicCoeff(float x_) +{ + + float x = fabs(x_); + if (x <= 1.0f) + { + return x * x * (1.5f * x - 2.5f) + 1.0f; + } + else if (x < 2.0f) + { + return x * (x * (-0.5f * x + 2.5f) - 4.0f) + 2.0f; + } + else + { + return 0.0f; + } + +} + +__kernel void warpBackwardKernel(__global const float* I0, int I0_step, int I0_col, int I0_row, + image2d_t tex_I1, image2d_t tex_I1x, image2d_t tex_I1y, + __global const float* u1, int u1_step, + __global const float* u2, + __global float* I1w, + __global float* I1wx, /*int I1wx_step,*/ + __global float* I1wy, /*int I1wy_step,*/ + __global float* grad, /*int grad_step,*/ + __global float* rho, + int I1w_step, + int u2_step, + int u1_offset_x, + int u1_offset_y, + int u2_offset_x, + int u2_offset_y) +{ + const int x = get_global_id(0); + const int y = get_global_id(1); + + if(x < I0_col&&y < I0_row) + { + //const float u1Val = u1(y, x); + const float u1Val = u1[(y + u1_offset_y) * u1_step + x + u1_offset_x]; + //const float u2Val = u2(y, x); + const float u2Val = u2[(y + u2_offset_y) * u2_step + x + u2_offset_x]; + + const float wx = x + u1Val; + const float wy = y + u2Val; + + const int xmin = ceil(wx - 2.0f); + const int xmax = floor(wx + 2.0f); + + const int ymin = ceil(wy - 2.0f); + const int ymax = floor(wy + 2.0f); + + float sum = 0.0f; + float sumx = 0.0f; + float sumy = 0.0f; + float wsum = 0.0f; + sampler_t sampleri = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST; + + for (int cy = ymin; cy <= ymax; ++cy) + { + for (int cx = xmin; cx <= xmax; ++cx) + { + const float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy); + + //sum += w * tex2D(tex_I1 , cx, cy); + int2 cood = (int2)(cx, cy); + sum += w * read_imagef(tex_I1, sampleri, cood).x; + //sumx += w * tex2D(tex_I1x, cx, cy); + sumx += w * read_imagef(tex_I1x, sampleri, cood).x; + //sumy += w * tex2D(tex_I1y, cx, cy); + sumy += w * read_imagef(tex_I1y, sampleri, cood).x; + + wsum += w; + } + } + + const float coeff = 1.0f / wsum; + + const float I1wVal = sum * coeff; + const float I1wxVal = sumx * coeff; + const float I1wyVal = sumy * coeff; + + I1w[y * I1w_step + x] = I1wVal; + I1wx[y * I1w_step + x] = I1wxVal; + I1wy[y * I1w_step + x] = I1wyVal; + + const float Ix2 = I1wxVal * I1wxVal; + const float Iy2 = I1wyVal * I1wyVal; + + // store the |Grad(I1)|^2 + grad[y * I1w_step + x] = Ix2 + Iy2; + + // compute the constant part of the rho function + const float I0Val = I0[y * I0_step + x]; + rho[y * I1w_step + x] = I1wVal - I1wxVal * u1Val - I1wyVal * u2Val - I0Val; + } + +} + +float readImage(__global const float *image, const int x, const int y, const int rows, const int cols, const int elemCntPerRow) +{ + int i0 = clamp(x, 0, cols - 1); + int j0 = clamp(y, 0, rows - 1); + int i1 = clamp(x + 1, 0, cols - 1); + int j1 = clamp(y + 1, 0, rows - 1); + + return image[j0 * elemCntPerRow + i0]; +} + +__kernel void warpBackwardKernelNoImage2d(__global const float* I0, int I0_step, int I0_col, int I0_row, + __global const float* tex_I1, __global const float* tex_I1x, __global const float* tex_I1y, + __global const float* u1, int u1_step, + __global const float* u2, + __global float* I1w, + __global float* I1wx, /*int I1wx_step,*/ + __global float* I1wy, /*int I1wy_step,*/ + __global float* grad, /*int grad_step,*/ + __global float* rho, + int I1w_step, + int u2_step, + int I1_step, + int I1x_step) +{ + const int x = get_global_id(0); + const int y = get_global_id(1); + + if(x < I0_col&&y < I0_row) + { + //const float u1Val = u1(y, x); + const float u1Val = u1[y * u1_step + x]; + //const float u2Val = u2(y, x); + const float u2Val = u2[y * u2_step + x]; + + const float wx = x + u1Val; + const float wy = y + u2Val; + + const int xmin = ceil(wx - 2.0f); + const int xmax = floor(wx + 2.0f); + + const int ymin = ceil(wy - 2.0f); + const int ymax = floor(wy + 2.0f); + + float sum = 0.0f; + float sumx = 0.0f; + float sumy = 0.0f; + float wsum = 0.0f; + + for (int cy = ymin; cy <= ymax; ++cy) + { + for (int cx = xmin; cx <= xmax; ++cx) + { + const float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy); + + int2 cood = (int2)(cx, cy); + sum += w * readImage(tex_I1, cood.x, cood.y, I0_col, I0_row, I1_step); + sumx += w * readImage(tex_I1x, cood.x, cood.y, I0_col, I0_row, I1x_step); + sumy += w * readImage(tex_I1y, cood.x, cood.y, I0_col, I0_row, I1x_step); + wsum += w; + } + } + + const float coeff = 1.0f / wsum; + + const float I1wVal = sum * coeff; + const float I1wxVal = sumx * coeff; + const float I1wyVal = sumy * coeff; + + I1w[y * I1w_step + x] = I1wVal; + I1wx[y * I1w_step + x] = I1wxVal; + I1wy[y * I1w_step + x] = I1wyVal; + + const float Ix2 = I1wxVal * I1wxVal; + const float Iy2 = I1wyVal * I1wyVal; + + // store the |Grad(I1)|^2 + grad[y * I1w_step + x] = Ix2 + Iy2; + + // compute the constant part of the rho function + const float I0Val = I0[y * I0_step + x]; + rho[y * I1w_step + x] = I1wVal - I1wxVal * u1Val - I1wyVal * u2Val - I0Val; + } + +} + + +__kernel void estimateDualVariablesKernel(__global const float* u1, int u1_col, int u1_row, int u1_step, + __global const float* u2, + __global float* p11, int p11_step, + __global float* p12, + __global float* p21, + __global float* p22, + const float taut, + int u2_step, + int u1_offset_x, + int u1_offset_y, + int u2_offset_x, + int u2_offset_y) +{ + + //const int x = blockIdx.x * blockDim.x + threadIdx.x; + //const int y = blockIdx.y * blockDim.y + threadIdx.y; + const int x = get_global_id(0); + const int y = get_global_id(1); + + if(x < u1_col && y < u1_row) + { + int src_x1 = (x + 1) < (u1_col - 1) ? (x + 1) : (u1_col - 1); + const float u1x = u1[(y + u1_offset_y) * u1_step + src_x1 + u1_offset_x] - u1[(y + u1_offset_y) * u1_step + x + u1_offset_x]; + + int src_y1 = (y + 1) < (u1_row - 1) ? (y + 1) : (u1_row - 1); + const float u1y = u1[(src_y1 + u1_offset_y) * u1_step + x + u1_offset_x] - u1[(y + u1_offset_y) * u1_step + x + u1_offset_x]; + + int src_x2 = (x + 1) < (u1_col - 1) ? (x + 1) : (u1_col - 1); + const float u2x = u2[(y + u2_offset_y) * u2_step + src_x2 + u2_offset_x] - u2[(y + u2_offset_y) * u2_step + x + u2_offset_x]; + + int src_y2 = (y + 1) < (u1_row - 1) ? (y + 1) : (u1_row - 1); + const float u2y = u2[(src_y2 + u2_offset_y) * u2_step + x + u2_offset_x] - u2[(y + u2_offset_y) * u2_step + x + u2_offset_x]; + + const float g1 = hypot(u1x, u1y); + const float g2 = hypot(u2x, u2y); + + const float ng1 = 1.0f + taut * g1; + const float ng2 = 1.0f + taut * g2; + + p11[y * p11_step + x] = (p11[y * p11_step + x] + taut * u1x) / ng1; + p12[y * p11_step + x] = (p12[y * p11_step + x] + taut * u1y) / ng1; + p21[y * p11_step + x] = (p21[y * p11_step + x] + taut * u2x) / ng2; + p22[y * p11_step + x] = (p22[y * p11_step + x] + taut * u2y) / ng2; + } + +} + +float divergence(__global const float* v1, __global const float* v2, int y, int x, int v1_step, int v2_step) +{ + + if (x > 0 && y > 0) + { + const float v1x = v1[y * v1_step + x] - v1[y * v1_step + x - 1]; + const float v2y = v2[y * v2_step + x] - v2[(y - 1) * v2_step + x]; + return v1x + v2y; + } + else + { + if (y > 0) + return v1[y * v1_step + 0] + v2[y * v2_step + 0] - v2[(y - 1) * v2_step + 0]; + else + { + if (x > 0) + return v1[0 * v1_step + x] - v1[0 * v1_step + x - 1] + v2[0 * v2_step + x]; + else + return v1[0 * v1_step + 0] + v2[0 * v2_step + 0]; + } + } + +} + +__kernel void estimateUKernel(__global const float* I1wx, int I1wx_col, int I1wx_row, int I1wx_step, + __global const float* I1wy, /*int I1wy_step,*/ + __global const float* grad, /*int grad_step,*/ + __global const float* rho_c, /*int rho_c_step,*/ + __global const float* p11, /*int p11_step,*/ + __global const float* p12, /*int p12_step,*/ + __global const float* p21, /*int p21_step,*/ + __global const float* p22, /*int p22_step,*/ + __global float* u1, int u1_step, + __global float* u2, + __global float* error, const float l_t, const float theta, int u2_step, + int u1_offset_x, + int u1_offset_y, + int u2_offset_x, + int u2_offset_y) +{ + + //const int x = blockIdx.x * blockDim.x + threadIdx.x; + //const int y = blockIdx.y * blockDim.y + threadIdx.y; + + int x = get_global_id(0); + int y = get_global_id(1); + + + if(x < I1wx_col && y < I1wx_row) + { + const float I1wxVal = I1wx[y * I1wx_step + x]; + const float I1wyVal = I1wy[y * I1wx_step + x]; + const float gradVal = grad[y * I1wx_step + x]; + const float u1OldVal = u1[(y + u1_offset_y) * u1_step + x + u1_offset_x]; + const float u2OldVal = u2[(y + u2_offset_y) * u2_step + x + u2_offset_x]; + + const float rho = rho_c[y * I1wx_step + x] + (I1wxVal * u1OldVal + I1wyVal * u2OldVal); + + // estimate the values of the variable (v1, v2) (thresholding operator TH) + + float d1 = 0.0f; + float d2 = 0.0f; + + if (rho < -l_t * gradVal) + { + d1 = l_t * I1wxVal; + d2 = l_t * I1wyVal; + } + else if (rho > l_t * gradVal) + { + d1 = -l_t * I1wxVal; + d2 = -l_t * I1wyVal; + } + else if (gradVal > 1.192092896e-07f) + { + const float fi = -rho / gradVal; + d1 = fi * I1wxVal; + d2 = fi * I1wyVal; + } + + const float v1 = u1OldVal + d1; + const float v2 = u2OldVal + d2; + + // compute the divergence of the dual variable (p1, p2) + + const float div_p1 = divergence(p11, p12, y, x, I1wx_step, I1wx_step); + const float div_p2 = divergence(p21, p22, y, x, I1wx_step, I1wx_step); + + // estimate the values of the optical flow (u1, u2) + + const float u1NewVal = v1 + theta * div_p1; + const float u2NewVal = v2 + theta * div_p2; + + u1[(y + u1_offset_y) * u1_step + x + u1_offset_x] = u1NewVal; + u2[(y + u2_offset_y) * u2_step + x + u2_offset_x] = u2NewVal; + + const float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal); + const float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal); + error[y * I1wx_step + x] = n1 + n2; + } + +} diff --git a/modules/ocl/src/tvl1flow.cpp b/modules/ocl/src/tvl1flow.cpp new file mode 100644 index 000000000..8182f41a1 --- /dev/null +++ b/modules/ocl/src/tvl1flow.cpp @@ -0,0 +1,475 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Jin Ma, jin@multicorewareinc.com +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + + +#include "precomp.hpp" +using namespace std; +using namespace cv; +using namespace cv::ocl; + +namespace cv +{ + namespace ocl + { + ///////////////////////////OpenCL kernel strings/////////////////////////// + extern const char* tvl1flow; + } +} + +cv::ocl::OpticalFlowDual_TVL1_OCL::OpticalFlowDual_TVL1_OCL() +{ + tau = 0.25; + lambda = 0.15; + theta = 0.3; + nscales = 5; + warps = 5; + epsilon = 0.01; + iterations = 300; + useInitialFlow = false; +} + +void cv::ocl::OpticalFlowDual_TVL1_OCL::operator()(const oclMat& I0, const oclMat& I1, oclMat& flowx, oclMat& flowy) +{ + CV_Assert( I0.type() == CV_8UC1 || I0.type() == CV_32FC1 ); + CV_Assert( I0.size() == I1.size() ); + CV_Assert( I0.type() == I1.type() ); + CV_Assert( !useInitialFlow || (flowx.size() == I0.size() && flowx.type() == CV_32FC1 && flowy.size() == flowx.size() && flowy.type() == flowx.type()) ); + CV_Assert( nscales > 0 ); + + // allocate memory for the pyramid structure + I0s.resize(nscales); + I1s.resize(nscales); + u1s.resize(nscales); + u2s.resize(nscales); + //I0s_step == I1s_step + I0.convertTo(I0s[0], CV_32F, I0.depth() == CV_8U ? 1.0 : 255.0); + I1.convertTo(I1s[0], CV_32F, I1.depth() == CV_8U ? 1.0 : 255.0); + + + if (!useInitialFlow) + { + flowx.create(I0.size(), CV_32FC1); + flowy.create(I0.size(), CV_32FC1); + } + //u1s_step != u2s_step + u1s[0] = flowx; + u2s[0] = flowy; + + I1x_buf.create(I0.size(), CV_32FC1); + I1y_buf.create(I0.size(), CV_32FC1); + + I1w_buf.create(I0.size(), CV_32FC1); + I1wx_buf.create(I0.size(), CV_32FC1); + I1wy_buf.create(I0.size(), CV_32FC1); + + grad_buf.create(I0.size(), CV_32FC1); + rho_c_buf.create(I0.size(), CV_32FC1); + + p11_buf.create(I0.size(), CV_32FC1); + p12_buf.create(I0.size(), CV_32FC1); + p21_buf.create(I0.size(), CV_32FC1); + p22_buf.create(I0.size(), CV_32FC1); + + diff_buf.create(I0.size(), CV_32FC1); + + // create the scales + for (int s = 1; s < nscales; ++s) + { + ocl::pyrDown(I0s[s - 1], I0s[s]); + ocl::pyrDown(I1s[s - 1], I1s[s]); + + if (I0s[s].cols < 16 || I0s[s].rows < 16) + { + nscales = s; + break; + } + + if (useInitialFlow) + { + ocl::pyrDown(u1s[s - 1], u1s[s]); + ocl::pyrDown(u2s[s - 1], u2s[s]); + + //ocl::multiply(u1s[s], Scalar::all(0.5), u1s[s]); + multiply(0.5, u1s[s], u1s[s]); + //ocl::multiply(u2s[s], Scalar::all(0.5), u2s[s]); + multiply(0.5, u1s[s], u2s[s]); + } + } + + // pyramidal structure for computing the optical flow + for (int s = nscales - 1; s >= 0; --s) + { + // compute the optical flow at the current scale + procOneScale(I0s[s], I1s[s], u1s[s], u2s[s]); + + // if this was the last scale, finish now + if (s == 0) + break; + + // otherwise, upsample the optical flow + + // zoom the optical flow for the next finer scale + ocl::resize(u1s[s], u1s[s - 1], I0s[s - 1].size()); + ocl::resize(u2s[s], u2s[s - 1], I0s[s - 1].size()); + + // scale the optical flow with the appropriate zoom factor + multiply(2, u1s[s - 1], u1s[s - 1]); + multiply(2, u2s[s - 1], u2s[s - 1]); + + } + +} + +namespace ocl_tvl1flow +{ + void centeredGradient(const oclMat &src, oclMat &dx, oclMat &dy); + + void warpBackward(const oclMat &I0, const oclMat &I1, oclMat &I1x, oclMat &I1y, + oclMat &u1, oclMat &u2, oclMat &I1w, oclMat &I1wx, oclMat &I1wy, + oclMat &grad, oclMat &rho); + + void estimateU(oclMat &I1wx, oclMat &I1wy, oclMat &grad, + oclMat &rho_c, oclMat &p11, oclMat &p12, + oclMat &p21, oclMat &p22, oclMat &u1, + oclMat &u2, oclMat &error, float l_t, float theta); + + void estimateDualVariables(oclMat &u1, oclMat &u2, + oclMat &p11, oclMat &p12, oclMat &p21, oclMat &p22, float taut); +} + +void cv::ocl::OpticalFlowDual_TVL1_OCL::procOneScale(const oclMat &I0, const oclMat &I1, oclMat &u1, oclMat &u2) +{ + using namespace ocl_tvl1flow; + + const double scaledEpsilon = epsilon * epsilon * I0.size().area(); + + CV_DbgAssert( I1.size() == I0.size() ); + CV_DbgAssert( I1.type() == I0.type() ); + CV_DbgAssert( u1.empty() || u1.size() == I0.size() ); + CV_DbgAssert( u2.size() == u1.size() ); + + if (u1.empty()) + { + u1.create(I0.size(), CV_32FC1); + u1.setTo(Scalar::all(0)); + + u2.create(I0.size(), CV_32FC1); + u2.setTo(Scalar::all(0)); + } + + oclMat I1x = I1x_buf(Rect(0, 0, I0.cols, I0.rows)); + oclMat I1y = I1y_buf(Rect(0, 0, I0.cols, I0.rows)); + + centeredGradient(I1, I1x, I1y); + + oclMat I1w = I1w_buf(Rect(0, 0, I0.cols, I0.rows)); + oclMat I1wx = I1wx_buf(Rect(0, 0, I0.cols, I0.rows)); + oclMat I1wy = I1wy_buf(Rect(0, 0, I0.cols, I0.rows)); + + oclMat grad = grad_buf(Rect(0, 0, I0.cols, I0.rows)); + oclMat rho_c = rho_c_buf(Rect(0, 0, I0.cols, I0.rows)); + + oclMat p11 = p11_buf(Rect(0, 0, I0.cols, I0.rows)); + oclMat p12 = p12_buf(Rect(0, 0, I0.cols, I0.rows)); + oclMat p21 = p21_buf(Rect(0, 0, I0.cols, I0.rows)); + oclMat p22 = p22_buf(Rect(0, 0, I0.cols, I0.rows)); + p11.setTo(Scalar::all(0)); + p12.setTo(Scalar::all(0)); + p21.setTo(Scalar::all(0)); + p22.setTo(Scalar::all(0)); + + oclMat diff = diff_buf(Rect(0, 0, I0.cols, I0.rows)); + + const float l_t = static_cast(lambda * theta); + const float taut = static_cast(tau / theta); + + for (int warpings = 0; warpings < warps; ++warpings) + { + warpBackward(I0, I1, I1x, I1y, u1, u2, I1w, I1wx, I1wy, grad, rho_c); + + double error = numeric_limits::max(); + for (int n = 0; error > scaledEpsilon && n < iterations; ++n) + { + estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, + u1, u2, diff, l_t, static_cast(theta)); + + error = ocl::sum(diff)[0]; + + estimateDualVariables(u1, u2, p11, p12, p21, p22, taut); + + } + } + +} + +void cv::ocl::OpticalFlowDual_TVL1_OCL::collectGarbage() +{ + I0s.clear(); + I1s.clear(); + u1s.clear(); + u2s.clear(); + + I1x_buf.release(); + I1y_buf.release(); + + I1w_buf.release(); + I1wx_buf.release(); + I1wy_buf.release(); + + grad_buf.release(); + rho_c_buf.release(); + + p11_buf.release(); + p12_buf.release(); + p21_buf.release(); + p22_buf.release(); + + diff_buf.release(); + norm_buf.release(); +} + +void ocl_tvl1flow::centeredGradient(const oclMat &src, oclMat &dx, oclMat &dy) +{ + Context *clCxt = src.clCxt; + size_t localThreads[3] = {32, 8, 1}; + size_t globalThreads[3] = {src.cols, src.rows, 1}; + + int srcElementSize = src.elemSize(); + int src_step = src.step/srcElementSize; + + int dElememntSize = dx.elemSize(); + int dx_step = dx.step/dElememntSize; + + string kernelName = "centeredGradientKernel"; + vector< pair > args; + args.push_back( make_pair( sizeof(cl_mem), (void*)&src.data)); + args.push_back( make_pair( sizeof(cl_int), (void*)&src.cols)); + args.push_back( make_pair( sizeof(cl_int), (void*)&src.rows)); + args.push_back( make_pair( sizeof(cl_int), (void*)&src_step)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&dx.data)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&dy.data)); + args.push_back( make_pair( sizeof(cl_int), (void*)&dx_step)); + openCLExecuteKernel(clCxt, &tvl1flow, kernelName, globalThreads, localThreads, args, -1, -1); + +} + +void ocl_tvl1flow::estimateDualVariables(oclMat &u1, oclMat &u2, oclMat &p11, oclMat &p12, oclMat &p21, oclMat &p22, float taut) +{ + Context *clCxt = u1.clCxt; + + size_t localThread[] = {32, 8, 1}; + size_t globalThread[] = + { + u1.cols, + u1.rows, + 1 + }; + + int u1_element_size = u1.elemSize(); + int u1_step = u1.step/u1_element_size; + + int u2_element_size = u2.elemSize(); + int u2_step = u2.step/u2_element_size; + + int p11_element_size = p11.elemSize(); + int p11_step = p11.step/p11_element_size; + + int u1_offset_y = u1.offset/u1.step; + int u1_offset_x = u1.offset%u1.step; + u1_offset_x = u1_offset_x/u1.elemSize(); + + int u2_offset_y = u2.offset/u2.step; + int u2_offset_x = u2.offset%u2.step; + u2_offset_x = u2_offset_x/u2.elemSize(); + + string kernelName = "estimateDualVariablesKernel"; + vector< pair > args; + args.push_back( make_pair( sizeof(cl_mem), (void*)&u1.data)); + args.push_back( make_pair( sizeof(cl_int), (void*)&u1.cols)); + args.push_back( make_pair( sizeof(cl_int), (void*)&u1.rows)); + args.push_back( make_pair( sizeof(cl_int), (void*)&u1_step)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&u2.data)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&p11.data)); + args.push_back( make_pair( sizeof(cl_int), (void*)&p11_step)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&p12.data)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&p21.data)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&p22.data)); + args.push_back( make_pair( sizeof(cl_float), (void*)&taut)); + args.push_back( make_pair( sizeof(cl_int), (void*)&u2_step)); + args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_x)); + args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_y)); + args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_x)); + args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_y)); + + openCLExecuteKernel(clCxt, &tvl1flow, kernelName, globalThread, localThread, args, -1, -1); +} + +void ocl_tvl1flow::estimateU(oclMat &I1wx, oclMat &I1wy, oclMat &grad, + oclMat &rho_c, oclMat &p11, oclMat &p12, + oclMat &p21, oclMat &p22, oclMat &u1, + oclMat &u2, oclMat &error, float l_t, float theta) +{ + Context* clCxt = I1wx.clCxt; + + size_t localThread[] = {32, 8, 1}; + size_t globalThread[] = + { + I1wx.cols, + I1wx.rows, + 1 + }; + + int I1wx_element_size = I1wx.elemSize(); + int I1wx_step = I1wx.step/I1wx_element_size; + + int u1_element_size = u1.elemSize(); + int u1_step = u1.step/u1_element_size; + + int u2_element_size = u2.elemSize(); + int u2_step = u2.step/u2_element_size; + + int u1_offset_y = u1.offset/u1.step; + int u1_offset_x = u1.offset%u1.step; + u1_offset_x = u1_offset_x/u1.elemSize(); + + int u2_offset_y = u2.offset/u2.step; + int u2_offset_x = u2.offset%u2.step; + u2_offset_x = u2_offset_x/u2.elemSize(); + + string kernelName = "estimateUKernel"; + vector< pair > args; + args.push_back( make_pair( sizeof(cl_mem), (void*)&I1wx.data)); + args.push_back( make_pair( sizeof(cl_int), (void*)&I1wx.cols)); + args.push_back( make_pair( sizeof(cl_int), (void*)&I1wx.rows)); + args.push_back( make_pair( sizeof(cl_int), (void*)&I1wx_step)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&I1wy.data)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&grad.data)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&rho_c.data)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&p11.data)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&p12.data)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&p21.data)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&p22.data)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&u1.data)); + args.push_back( make_pair( sizeof(cl_int), (void*)&u1_step)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&u2.data)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&error.data)); + args.push_back( make_pair( sizeof(cl_float), (void*)&l_t)); + args.push_back( make_pair( sizeof(cl_float), (void*)&theta)); + args.push_back( make_pair( sizeof(cl_int), (void*)&u2_step)); + args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_x)); + args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_y)); + args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_x)); + args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_y)); + + openCLExecuteKernel(clCxt, &tvl1flow, kernelName, globalThread, localThread, args, -1, -1); +} + +void ocl_tvl1flow::warpBackward(const oclMat &I0, const oclMat &I1, oclMat &I1x, oclMat &I1y, oclMat &u1, oclMat &u2, oclMat &I1w, oclMat &I1wx, oclMat &I1wy, oclMat &grad, oclMat &rho) +{ + Context* clCxt = I0.clCxt; + const bool isImgSupported = support_image2d(clCxt); + + CV_Assert(isImgSupported); + + int u1ElementSize = u1.elemSize(); + int u1Step = u1.step/u1ElementSize; + + int u2ElementSize = u2.elemSize(); + int u2Step = u2.step/u2ElementSize; + + int I0ElementSize = I0.elemSize(); + int I0Step = I0.step/I0ElementSize; + + int I1w_element_size = I1w.elemSize(); + int I1w_step = I1w.step/I1w_element_size; + + int u1_offset_y = u1.offset/u1.step; + int u1_offset_x = u1.offset%u1.step; + u1_offset_x = u1_offset_x/u1.elemSize(); + + int u2_offset_y = u2.offset/u2.step; + int u2_offset_x = u2.offset%u2.step; + u2_offset_x = u2_offset_x/u2.elemSize(); + + size_t localThread[] = {32, 8, 1}; + size_t globalThread[] = + { + I0.cols, + I0.rows, + 1 + }; + + cl_mem I1_tex; + cl_mem I1x_tex; + cl_mem I1y_tex; + I1_tex = bindTexture(I1); + I1x_tex = bindTexture(I1x); + I1y_tex = bindTexture(I1y); + + string kernelName = "warpBackwardKernel"; + vector< pair > args; + args.push_back( make_pair( sizeof(cl_mem), (void*)&I0.data)); + args.push_back( make_pair( sizeof(cl_int), (void*)&I0Step)); + args.push_back( make_pair( sizeof(cl_int), (void*)&I0.cols)); + args.push_back( make_pair( sizeof(cl_int), (void*)&I0.rows)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&I1_tex)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&I1x_tex)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&I1y_tex)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&u1.data)); + args.push_back( make_pair( sizeof(cl_int), (void*)&u1Step)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&u2.data)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&I1w.data)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&I1wx.data)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&I1wy.data)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&grad.data)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&rho.data)); + args.push_back( make_pair( sizeof(cl_int), (void*)&I1w_step)); + args.push_back( make_pair( sizeof(cl_int), (void*)&u2Step)); + args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_x)); + args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_y)); + args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_x)); + args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_y)); + + openCLExecuteKernel(clCxt, &tvl1flow, kernelName, globalThread, localThread, args, -1, -1); +} \ No newline at end of file diff --git a/modules/ocl/test/test_pyrlk.cpp b/modules/ocl/test/test_optflow.cpp similarity index 69% rename from modules/ocl/test/test_pyrlk.cpp rename to modules/ocl/test/test_optflow.cpp index 064cb30bd..b08d33a08 100644 --- a/modules/ocl/test/test_pyrlk.cpp +++ b/modules/ocl/test/test_optflow.cpp @@ -1,4 +1,4 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////// // // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. // @@ -7,12 +7,16 @@ // copy or use the software. // // -// Intel License Agreement +// License Agreement // For Open Source Computer Vision Library -// -// Copyright (C) 2000, Intel Corporation, all rights reserved. +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. // Third party copyrights are property of their respective owners. // +// @Authors +// +// // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: // @@ -21,9 +25,9 @@ // // * Redistribution's in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. +// and/or other oclMaterials provided with the distribution. // -// * The name of Intel Corporation may not be used to endorse or promote products +// * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // // This software is provided by the copyright holders and contributors "as is" and @@ -51,6 +55,47 @@ using namespace testing; using namespace std; extern string workdir; +////////////////////////////////////////////////////////////////////////// +PARAM_TEST_CASE(TVL1, bool) +{ + bool useRoi; + + virtual void SetUp() + { + useRoi = GET_PARAM(0); + } + +}; + +TEST_P(TVL1, Accuracy) +{ + cv::Mat frame0 = readImage(workdir + "../gpu/rubberwhale1.png", cv::IMREAD_GRAYSCALE); + ASSERT_FALSE(frame0.empty()); + + cv::Mat frame1 = readImage(workdir + "../gpu/rubberwhale2.png", cv::IMREAD_GRAYSCALE); + ASSERT_FALSE(frame1.empty()); + + cv::ocl::OpticalFlowDual_TVL1_OCL d_alg; + cv::RNG &rng = TS::ptr()->get_rng(); + cv::Mat flowx = randomMat(rng, frame0.size(), CV_32FC1, 0, 0, useRoi); + cv::Mat flowy = randomMat(rng, frame0.size(), CV_32FC1, 0, 0, useRoi); + cv::ocl::oclMat d_flowx(flowx), d_flowy(flowy); + d_alg(oclMat(frame0), oclMat(frame1), d_flowx, d_flowy); + + cv::Ptr alg = cv::createOptFlow_DualTVL1(); + cv::Mat flow; + alg->calc(frame0, frame1, flow); + cv::Mat gold[2]; + cv::split(flow, gold); + + EXPECT_MAT_SIMILAR(gold[0], d_flowx, 3e-3); + EXPECT_MAT_SIMILAR(gold[1], d_flowy, 3e-3); +} +INSTANTIATE_TEST_CASE_P(OCL_Video, TVL1, Values(true, false)); + + +///////////////////////////////////////////////////////////////////////////////////////////////// +// PyrLKOpticalFlow PARAM_TEST_CASE(Sparse, bool, bool) { @@ -60,7 +105,7 @@ PARAM_TEST_CASE(Sparse, bool, bool) virtual void SetUp() { UseSmart = GET_PARAM(0); - useGray = GET_PARAM(0); + useGray = GET_PARAM(1); } }; @@ -147,9 +192,9 @@ TEST_P(Sparse, Mat) } -INSTANTIATE_TEST_CASE_P(Video, Sparse, Combine( - Values(false, true), - Values(false))); +INSTANTIATE_TEST_CASE_P(OCL_Video, Sparse, Combine( + Values(false, true), + Values(false, true))); #endif // HAVE_OPENCL