gpu TVL1 Optical Flow optimization:

do not calculate sum of error in every round of iteration; instead the error will be summed every 2nd times or more, if the previous sum of error is too far away from threshold.
2013-08-27 11:21:41 +04:00
parent 525b6eca2e
commit bff0fad6c3
3 changed files with 26 additions and 11 deletions
--- a/modules/gpu/src/cuda/tvl1flow.cu
+++ b/modules/gpu/src/cuda/tvl1flow.cu
@@ -211,7 +211,7 @@ namespace tvl1flow
                              const PtrStepf grad, const PtrStepf rho_c,
                              const PtrStepf p11, const PtrStepf p12, const PtrStepf p21, const PtrStepf p22,
                              PtrStepf u1, PtrStepf u2, PtrStepf error,
-                              const float l_t, const float theta)
+                              const float l_t, const float theta, const bool calcError)
    {
        const int x = blockIdx.x * blockDim.x + threadIdx.x;
        const int y = blockIdx.y * blockDim.y + threadIdx.y;
@@ -265,21 +265,24 @@ namespace tvl1flow
        u1(y, x) = u1NewVal;
        u2(y, x) = u2NewVal;

-        const float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal);
-        const float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal);
-        error(y, x) = n1 + n2;
+        if (calcError)
+        {
+            const float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal);
+            const float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal);
+            error(y, x) = n1 + n2;
+        }
    }

    void estimateU(PtrStepSzf I1wx, PtrStepSzf I1wy,
                   PtrStepSzf grad, PtrStepSzf rho_c,
                   PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22,
                   PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf error,
-                   float l_t, float theta)
+                   float l_t, float theta, bool calcError)
    {
        const dim3 block(32, 8);
        const dim3 grid(divUp(I1wx.cols, block.x), divUp(I1wx.rows, block.y));

-        estimateUKernel<<<grid, block>>>(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, u1, u2, error, l_t, theta);
+        estimateUKernel<<<grid, block>>>(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, u1, u2, error, l_t, theta, calcError);
        cudaSafeCall( cudaGetLastError() );

        cudaSafeCall( cudaDeviceSynchronize() );