Neon optimization of cv::scaleAdd (CV_32F)

2014-09-23 21:14:42 +04:00 · 2014-09-23 21:14:42 +04:00 · 5d018c090f
commit 5d018c090f
parent ecbec7235f
1 changed files with 10 additions and 0 deletions
--- a/modules/core/src/matmul.cpp
+++ b/modules/core/src/matmul.cpp
@ -2102,6 +2102,16 @@ static void scaleAdd_32f(const float* src1, const float* src2, float* dst,
            }
    }
    else
 #elif CV_NEON
    if (true)
    {
        for ( ; i <= len - 4; i += 4)
        {
            float32x4_t v_src1 = vld1q_f32(src1 + i), v_src2 = vld1q_f32(src2 + i);
            vst1q_f32(dst + i, vaddq_f32(vmulq_n_f32(v_src1, alpha), v_src2));
        }
    }
    else
 #endif
    //vz why do we need unroll here?
    for( ; i <= len - 4; i += 4 )