Merge pull request #1345 from pengx17:2.4_tvl1ocl_opt
This commit is contained in:
		| @@ -341,7 +341,8 @@ __kernel void estimateUKernel(__global const float* I1wx, int I1wx_col, int I1wx | |||||||
|     int u1_offset_x, |     int u1_offset_x, | ||||||
|     int u1_offset_y, |     int u1_offset_y, | ||||||
|     int u2_offset_x, |     int u2_offset_x, | ||||||
|     int u2_offset_y) |     int u2_offset_y, | ||||||
|  |     char calc_error) | ||||||
| { | { | ||||||
|  |  | ||||||
|     //const int x = blockIdx.x * blockDim.x + threadIdx.x; |     //const int x = blockIdx.x * blockDim.x + threadIdx.x; | ||||||
| @@ -399,9 +400,12 @@ __kernel void estimateUKernel(__global const float* I1wx, int I1wx_col, int I1wx | |||||||
|         u1[(y + u1_offset_y) * u1_step + x + u1_offset_x] = u1NewVal; |         u1[(y + u1_offset_y) * u1_step + x + u1_offset_x] = u1NewVal; | ||||||
|         u2[(y + u2_offset_y) * u2_step + x + u2_offset_x] = u2NewVal; |         u2[(y + u2_offset_y) * u2_step + x + u2_offset_x] = u2NewVal; | ||||||
|  |  | ||||||
|  |         if(calc_error) | ||||||
|  |         { | ||||||
|             const float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal); |             const float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal); | ||||||
|             const float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal); |             const float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal); | ||||||
|             error[y * I1wx_step + x] = n1 + n2; |             error[y * I1wx_step + x] = n1 + n2; | ||||||
|         } |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
| } | } | ||||||
|   | |||||||
| @@ -172,7 +172,7 @@ namespace ocl_tvl1flow | |||||||
|     void estimateU(oclMat &I1wx, oclMat &I1wy, oclMat &grad, |     void estimateU(oclMat &I1wx, oclMat &I1wy, oclMat &grad, | ||||||
|         oclMat &rho_c, oclMat &p11, oclMat &p12, |         oclMat &rho_c, oclMat &p11, oclMat &p12, | ||||||
|         oclMat &p21, oclMat &p22, oclMat &u1, |         oclMat &p21, oclMat &p22, oclMat &u1, | ||||||
|         oclMat &u2, oclMat &error, float l_t, float theta); |         oclMat &u2, oclMat &error, float l_t, float theta, char calc_error); | ||||||
|  |  | ||||||
|     void estimateDualVariables(oclMat &u1, oclMat &u2, |     void estimateDualVariables(oclMat &u1, oclMat &u2, | ||||||
|         oclMat &p11, oclMat &p12, oclMat &p21, oclMat &p22, float taut); |         oclMat &p11, oclMat &p12, oclMat &p21, oclMat &p22, float taut); | ||||||
| @@ -229,18 +229,29 @@ void cv::ocl::OpticalFlowDual_TVL1_OCL::procOneScale(const oclMat &I0, const ocl | |||||||
|         warpBackward(I0, I1, I1x, I1y, u1, u2, I1w, I1wx, I1wy, grad, rho_c); |         warpBackward(I0, I1, I1x, I1y, u1, u2, I1w, I1wx, I1wy, grad, rho_c); | ||||||
|  |  | ||||||
|         double error = numeric_limits<double>::max(); |         double error = numeric_limits<double>::max(); | ||||||
|  |         double prev_error = 0; | ||||||
|         for (int n = 0; error > scaledEpsilon && n < iterations; ++n) |         for (int n = 0; error > scaledEpsilon && n < iterations; ++n) | ||||||
|         { |         { | ||||||
|  |             // some tweaks to make sum operation less frequently | ||||||
|  |             char calc_error = (n & 0x1) && (prev_error < scaledEpsilon); | ||||||
|             estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, |             estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, | ||||||
|                 u1, u2, diff, l_t, static_cast<float>(theta)); |                       u1, u2, diff, l_t, static_cast<float>(theta), calc_error); | ||||||
|  |             if(calc_error) | ||||||
|  |             { | ||||||
|                 error = ocl::sum(diff)[0]; |                 error = ocl::sum(diff)[0]; | ||||||
|  |                 prev_error = error; | ||||||
|  |             } | ||||||
|  |             else | ||||||
|  |             { | ||||||
|  |                 error = numeric_limits<double>::max(); | ||||||
|  |                 prev_error -= scaledEpsilon; | ||||||
|  |             } | ||||||
|             estimateDualVariables(u1, u2, p11, p12, p21, p22, taut); |             estimateDualVariables(u1, u2, p11, p12, p21, p22, taut); | ||||||
|  |  | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |  | ||||||
| } | } | ||||||
|  |  | ||||||
| void cv::ocl::OpticalFlowDual_TVL1_OCL::collectGarbage() | void cv::ocl::OpticalFlowDual_TVL1_OCL::collectGarbage() | ||||||
| @@ -348,7 +359,7 @@ void ocl_tvl1flow::estimateDualVariables(oclMat &u1, oclMat &u2, oclMat &p11, oc | |||||||
| void ocl_tvl1flow::estimateU(oclMat &I1wx, oclMat &I1wy, oclMat &grad, | void ocl_tvl1flow::estimateU(oclMat &I1wx, oclMat &I1wy, oclMat &grad, | ||||||
|     oclMat &rho_c, oclMat &p11, oclMat &p12, |     oclMat &rho_c, oclMat &p11, oclMat &p12, | ||||||
|     oclMat &p21, oclMat &p22, oclMat &u1, |     oclMat &p21, oclMat &p22, oclMat &u1, | ||||||
|     oclMat &u2, oclMat &error, float l_t, float theta) |     oclMat &u2, oclMat &error, float l_t, float theta, char calc_error) | ||||||
| { | { | ||||||
|     Context* clCxt = I1wx.clCxt; |     Context* clCxt = I1wx.clCxt; | ||||||
|  |  | ||||||
| @@ -401,6 +412,7 @@ void ocl_tvl1flow::estimateU(oclMat &I1wx, oclMat &I1wy, oclMat &grad, | |||||||
|     args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_y)); |     args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_y)); | ||||||
|     args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_x)); |     args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_x)); | ||||||
|     args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_y)); |     args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_y)); | ||||||
|  |     args.push_back( make_pair( sizeof(cl_char), (void*)&calc_error)); | ||||||
|  |  | ||||||
|     openCLExecuteKernel(clCxt, &tvl1flow, kernelName, globalThread, localThread, args, -1, -1); |     openCLExecuteKernel(clCxt, &tvl1flow, kernelName, globalThread, localThread, args, -1, -1); | ||||||
| } | } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Roman Donchenko
					Roman Donchenko