fully implemented SSE and NEON cases of intrin.hpp; extended the HAL with some basic math functions

2015-04-16 23:00:26 +03:00
parent a2bba1b9e6
commit ee11a2d266
18 changed files with 2460 additions and 2003 deletions
--- a/modules/core/include/opencv2/core/base.hpp
+++ b/modules/core/include/opencv2/core/base.hpp
@@ -53,6 +53,7 @@

 #include "opencv2/core/cvdef.h"
 #include "opencv2/core/cvstd.hpp"
+#include "opencv2/hal.hpp"

 namespace cv
 {
@@ -419,6 +420,12 @@ typedef Hamming HammingLUT;

 /////////////////////////////////// inline norms ////////////////////////////////////

+template<typename _Tp> inline _Tp cv_abs(_Tp x) { return std::abs(x); }
+inline int cv_abs(uchar x) { return x; }
+inline int cv_abs(schar x) { return std::abs(x); }
+inline int cv_abs(ushort x) { return x; }
+inline int cv_abs(short x) { return std::abs(x); }
+
 template<typename _Tp, typename _AccTp> static inline
 _AccTp normL2Sqr(const _Tp* a, int n)
 {
@@ -447,12 +454,12 @@ _AccTp normL1(const _Tp* a, int n)
 #if CV_ENABLE_UNROLLED
    for(; i <= n - 4; i += 4 )
    {
-        s += (_AccTp)std::abs(a[i]) + (_AccTp)std::abs(a[i+1]) +
-            (_AccTp)std::abs(a[i+2]) + (_AccTp)std::abs(a[i+3]);
+        s += (_AccTp)cv_abs(a[i]) + (_AccTp)cv_abs(a[i+1]) +
+            (_AccTp)cv_abs(a[i+2]) + (_AccTp)cv_abs(a[i+3]);
    }
 #endif
    for( ; i < n; i++ )
-        s += std::abs(a[i]);
+        s += cv_abs(a[i]);
    return s;
 }

@@ -461,7 +468,7 @@ _AccTp normInf(const _Tp* a, int n)
 {
    _AccTp s = 0;
    for( int i = 0; i < n; i++ )
-        s = std::max(s, (_AccTp)std::abs(a[i]));
+        s = std::max(s, (_AccTp)cv_abs(a[i]));
    return s;
 }

@@ -485,11 +492,10 @@ _AccTp normL2Sqr(const _Tp* a, const _Tp* b, int n)
    return s;
 }

-template<> inline
-float normL2Sqr(const float* a, const float* b, int n)
+inline float normL2Sqr(const float* a, const float* b, int n)
 {
    if( n >= 8 )
-        return normL2Sqr_(a, b, n);
+        return hal::normL2Sqr_(a, b, n);
    float s = 0;
    for( int i = 0; i < n; i++ )
    {
@@ -519,11 +525,10 @@ _AccTp normL1(const _Tp* a, const _Tp* b, int n)
    return s;
 }

-template<> inline
-float normL1(const float* a, const float* b, int n)
+inline float normL1(const float* a, const float* b, int n)
 {
    if( n >= 8 )
-        return normL1_(a, b, n);
+        return hal::normL1_(a, b, n);
    float s = 0;
    for( int i = 0; i < n; i++ )
    {
@@ -533,10 +538,9 @@ float normL1(const float* a, const float* b, int n)
    return s;
 }

-template<> inline
-int normL1(const uchar* a, const uchar* b, int n)
+inline int normL1(const uchar* a, const uchar* b, int n)
 {
-    return normL1_(a, b, n);
+    return hal::normL1_(a, b, n);
 }

 template<typename _Tp, typename _AccTp> static inline
@@ -551,6 +555,23 @@ _AccTp normInf(const _Tp* a, const _Tp* b, int n)
    return s;
 }

+/** @brief Computes the cube root of an argument.
+
+ The function cubeRoot computes \f$\sqrt[3]{\texttt{val}}\f$. Negative arguments are handled correctly.
+ NaN and Inf are not handled. The accuracy approaches the maximum possible accuracy for
+ single-precision data.
+ @param val A function argument.
+ */
+CV_EXPORTS_W float cubeRoot(float val);
+
+/** @brief Calculates the angle of a 2D vector in degrees.
+
+ The function fastAtan2 calculates the full-range angle of an input 2D vector. The angle is measured
+ in degrees and varies from 0 to 360 degrees. The accuracy is about 0.3 degrees.
+ @param x x-coordinate of the vector.
+ @param y y-coordinate of the vector.
+ */
+CV_EXPORTS_W float fastAtan2(float y, float x);

 ////////////////// forward declarations for important OpenCV types //////////////////

--- a/modules/core/include/opencv2/core/matx.hpp
+++ b/modules/core/include/opencv2/core/matx.hpp
@@ -427,7 +427,7 @@ template<typename _Tp, int m> struct Matx_DetOp
    double operator ()(const Matx<_Tp, m, m>& a) const
    {
        Matx<_Tp, m, m> temp = a;
-        double p = LU(temp.val, m*sizeof(_Tp), m, 0, 0, 0);
+        double p = hal::LU(temp.val, m*sizeof(_Tp), m, 0, 0, 0);
        if( p == 0 )
            return p;
        for( int i = 0; i < m; i++ )
--- a/modules/core/include/opencv2/core/operations.hpp
+++ b/modules/core/include/opencv2/core/operations.hpp
@@ -72,9 +72,9 @@ template<typename _Tp, int m> struct Matx_FastInvOp
            b(i, i) = (_Tp)1;

        if( method == DECOMP_CHOLESKY )
-            return Cholesky(temp.val, m*sizeof(_Tp), m, b.val, m*sizeof(_Tp), m);
+            return hal::Cholesky(temp.val, m*sizeof(_Tp), m, b.val, m*sizeof(_Tp), m);

-        return LU(temp.val, m*sizeof(_Tp), m, b.val, m*sizeof(_Tp), m) != 0;
+        return hal::LU(temp.val, m*sizeof(_Tp), m, b.val, m*sizeof(_Tp), m) != 0;
    }
 };