From 168d1dc18ce8e1722cb7efcae0ed2912d0834ee2 Mon Sep 17 00:00:00 2001 From: peng xiao Date: Tue, 27 Aug 2013 11:42:54 +0800 Subject: [PATCH] Make some tweaks to make `sum` operations less frequently --- modules/ocl/src/opencl/tvl1flow.cl | 12 ++++++++---- modules/ocl/src/tvl1flow.cpp | 24 ++++++++++++++++++------ 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/modules/ocl/src/opencl/tvl1flow.cl b/modules/ocl/src/opencl/tvl1flow.cl index 4f8249602..095b339f8 100644 --- a/modules/ocl/src/opencl/tvl1flow.cl +++ b/modules/ocl/src/opencl/tvl1flow.cl @@ -341,7 +341,8 @@ __kernel void estimateUKernel(__global const float* I1wx, int I1wx_col, int I1wx int u1_offset_x, int u1_offset_y, int u2_offset_x, - int u2_offset_y) + int u2_offset_y, + char calc_error) { //const int x = blockIdx.x * blockDim.x + threadIdx.x; @@ -399,9 +400,12 @@ __kernel void estimateUKernel(__global const float* I1wx, int I1wx_col, int I1wx u1[(y + u1_offset_y) * u1_step + x + u1_offset_x] = u1NewVal; u2[(y + u2_offset_y) * u2_step + x + u2_offset_x] = u2NewVal; - const float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal); - const float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal); - error[y * I1wx_step + x] = n1 + n2; + if(calc_error) + { + const float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal); + const float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal); + error[y * I1wx_step + x] = n1 + n2; + } } } diff --git a/modules/ocl/src/tvl1flow.cpp b/modules/ocl/src/tvl1flow.cpp index 34e7fc613..5d0eca359 100644 --- a/modules/ocl/src/tvl1flow.cpp +++ b/modules/ocl/src/tvl1flow.cpp @@ -229,18 +229,29 @@ void cv::ocl::OpticalFlowDual_TVL1_OCL::procOneScale(const oclMat &I0, const ocl warpBackward(I0, I1, I1x, I1y, u1, u2, I1w, I1wx, I1wy, grad, rho_c); double error = numeric_limits::max(); + double prev_error = 0; for (int n = 0; error > scaledEpsilon && n < iterations; ++n) { + // some tweaks to make sum operation less frequently + char calc_error = (n & 0x1) && (prev_error < scaledEpsilon); estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, - u1, u2, diff, l_t, static_cast(theta)); - - error = ocl::sum(diff)[0]; - + u1, u2, diff, l_t, static_cast(theta), calc_error); + if(calc_error) + { + error = ocl::sum(diff)[0]; + prev_error = error; + } + else + { + error = numeric_limits::max(); + prev_error -= scaledEpsilon; + } estimateDualVariables(u1, u2, p11, p12, p21, p22, taut); } } + } void cv::ocl::OpticalFlowDual_TVL1_OCL::collectGarbage() @@ -348,7 +359,7 @@ void ocl_tvl1flow::estimateDualVariables(oclMat &u1, oclMat &u2, oclMat &p11, oc void ocl_tvl1flow::estimateU(oclMat &I1wx, oclMat &I1wy, oclMat &grad, oclMat &rho_c, oclMat &p11, oclMat &p12, oclMat &p21, oclMat &p22, oclMat &u1, - oclMat &u2, oclMat &error, float l_t, float theta) + oclMat &u2, oclMat &error, float l_t, float theta, char calc_error) { Context* clCxt = I1wx.clCxt; @@ -401,7 +412,8 @@ void ocl_tvl1flow::estimateU(oclMat &I1wx, oclMat &I1wy, oclMat &grad, args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_y)); args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_x)); args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_y)); - + args.push_back( make_pair( sizeof(cl_char), (void*)&calc_error)); + openCLExecuteKernel(clCxt, &tvl1flow, kernelName, globalThread, localThread, args, -1, -1); }