fully implemented SSE and NEON cases of intrin.hpp; extended the HAL with some basic math functions
This commit is contained in:
@@ -50,168 +50,6 @@
|
||||
namespace cv
|
||||
{
|
||||
|
||||
/****************************************************************************************\
|
||||
* LU & Cholesky implementation for small matrices *
|
||||
\****************************************************************************************/
|
||||
|
||||
template<typename _Tp> static inline int
|
||||
LUImpl(_Tp* A, size_t astep, int m, _Tp* b, size_t bstep, int n)
|
||||
{
|
||||
int i, j, k, p = 1;
|
||||
astep /= sizeof(A[0]);
|
||||
bstep /= sizeof(b[0]);
|
||||
|
||||
for( i = 0; i < m; i++ )
|
||||
{
|
||||
k = i;
|
||||
|
||||
for( j = i+1; j < m; j++ )
|
||||
if( std::abs(A[j*astep + i]) > std::abs(A[k*astep + i]) )
|
||||
k = j;
|
||||
|
||||
if( std::abs(A[k*astep + i]) < std::numeric_limits<_Tp>::epsilon() )
|
||||
return 0;
|
||||
|
||||
if( k != i )
|
||||
{
|
||||
for( j = i; j < m; j++ )
|
||||
std::swap(A[i*astep + j], A[k*astep + j]);
|
||||
if( b )
|
||||
for( j = 0; j < n; j++ )
|
||||
std::swap(b[i*bstep + j], b[k*bstep + j]);
|
||||
p = -p;
|
||||
}
|
||||
|
||||
_Tp d = -1/A[i*astep + i];
|
||||
|
||||
for( j = i+1; j < m; j++ )
|
||||
{
|
||||
_Tp alpha = A[j*astep + i]*d;
|
||||
|
||||
for( k = i+1; k < m; k++ )
|
||||
A[j*astep + k] += alpha*A[i*astep + k];
|
||||
|
||||
if( b )
|
||||
for( k = 0; k < n; k++ )
|
||||
b[j*bstep + k] += alpha*b[i*bstep + k];
|
||||
}
|
||||
|
||||
A[i*astep + i] = -d;
|
||||
}
|
||||
|
||||
if( b )
|
||||
{
|
||||
for( i = m-1; i >= 0; i-- )
|
||||
for( j = 0; j < n; j++ )
|
||||
{
|
||||
_Tp s = b[i*bstep + j];
|
||||
for( k = i+1; k < m; k++ )
|
||||
s -= A[i*astep + k]*b[k*bstep + j];
|
||||
b[i*bstep + j] = s*A[i*astep + i];
|
||||
}
|
||||
}
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
|
||||
int LU(float* A, size_t astep, int m, float* b, size_t bstep, int n)
|
||||
{
|
||||
return LUImpl(A, astep, m, b, bstep, n);
|
||||
}
|
||||
|
||||
|
||||
int LU(double* A, size_t astep, int m, double* b, size_t bstep, int n)
|
||||
{
|
||||
return LUImpl(A, astep, m, b, bstep, n);
|
||||
}
|
||||
|
||||
|
||||
template<typename _Tp> static inline bool
|
||||
CholImpl(_Tp* A, size_t astep, int m, _Tp* b, size_t bstep, int n)
|
||||
{
|
||||
_Tp* L = A;
|
||||
int i, j, k;
|
||||
double s;
|
||||
astep /= sizeof(A[0]);
|
||||
bstep /= sizeof(b[0]);
|
||||
|
||||
for( i = 0; i < m; i++ )
|
||||
{
|
||||
for( j = 0; j < i; j++ )
|
||||
{
|
||||
s = A[i*astep + j];
|
||||
for( k = 0; k < j; k++ )
|
||||
s -= L[i*astep + k]*L[j*astep + k];
|
||||
L[i*astep + j] = (_Tp)(s*L[j*astep + j]);
|
||||
}
|
||||
s = A[i*astep + i];
|
||||
for( k = 0; k < j; k++ )
|
||||
{
|
||||
double t = L[i*astep + k];
|
||||
s -= t*t;
|
||||
}
|
||||
if( s < std::numeric_limits<_Tp>::epsilon() )
|
||||
return false;
|
||||
L[i*astep + i] = (_Tp)(1./std::sqrt(s));
|
||||
}
|
||||
|
||||
if( !b )
|
||||
return true;
|
||||
|
||||
// LLt x = b
|
||||
// 1: L y = b
|
||||
// 2. Lt x = y
|
||||
|
||||
/*
|
||||
[ L00 ] y0 b0
|
||||
[ L10 L11 ] y1 = b1
|
||||
[ L20 L21 L22 ] y2 b2
|
||||
[ L30 L31 L32 L33 ] y3 b3
|
||||
|
||||
[ L00 L10 L20 L30 ] x0 y0
|
||||
[ L11 L21 L31 ] x1 = y1
|
||||
[ L22 L32 ] x2 y2
|
||||
[ L33 ] x3 y3
|
||||
*/
|
||||
|
||||
for( i = 0; i < m; i++ )
|
||||
{
|
||||
for( j = 0; j < n; j++ )
|
||||
{
|
||||
s = b[i*bstep + j];
|
||||
for( k = 0; k < i; k++ )
|
||||
s -= L[i*astep + k]*b[k*bstep + j];
|
||||
b[i*bstep + j] = (_Tp)(s*L[i*astep + i]);
|
||||
}
|
||||
}
|
||||
|
||||
for( i = m-1; i >= 0; i-- )
|
||||
{
|
||||
for( j = 0; j < n; j++ )
|
||||
{
|
||||
s = b[i*bstep + j];
|
||||
for( k = m-1; k > i; k-- )
|
||||
s -= L[k*astep + i]*b[k*bstep + j];
|
||||
b[i*bstep + j] = (_Tp)(s*L[i*astep + i]);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool Cholesky(float* A, size_t astep, int m, float* b, size_t bstep, int n)
|
||||
{
|
||||
return CholImpl(A, astep, m, b, bstep, n);
|
||||
}
|
||||
|
||||
bool Cholesky(double* A, size_t astep, int m, double* b, size_t bstep, int n)
|
||||
{
|
||||
return CholImpl(A, astep, m, b, bstep, n);
|
||||
}
|
||||
|
||||
|
||||
template<typename _Tp> static inline _Tp hypot(_Tp a, _Tp b)
|
||||
{
|
||||
a = std::abs(a);
|
||||
@@ -882,7 +720,7 @@ double cv::determinant( InputArray _mat )
|
||||
Mat a(rows, rows, CV_32F, (uchar*)buffer);
|
||||
mat.copyTo(a);
|
||||
|
||||
result = LU(a.ptr<float>(), a.step, rows, 0, 0, 0);
|
||||
result = hal::LU(a.ptr<float>(), a.step, rows, 0, 0, 0);
|
||||
if( result )
|
||||
{
|
||||
for( int i = 0; i < rows; i++ )
|
||||
@@ -906,7 +744,7 @@ double cv::determinant( InputArray _mat )
|
||||
Mat a(rows, rows, CV_64F, (uchar*)buffer);
|
||||
mat.copyTo(a);
|
||||
|
||||
result = LU(a.ptr<double>(), a.step, rows, 0, 0, 0);
|
||||
result = hal::LU(a.ptr<double>(), a.step, rows, 0, 0, 0);
|
||||
if( result )
|
||||
{
|
||||
for( int i = 0; i < rows; i++ )
|
||||
@@ -1169,13 +1007,13 @@ double cv::invert( InputArray _src, OutputArray _dst, int method )
|
||||
setIdentity(dst);
|
||||
|
||||
if( method == DECOMP_LU && type == CV_32F )
|
||||
result = LU(src1.ptr<float>(), src1.step, n, dst.ptr<float>(), dst.step, n) != 0;
|
||||
result = hal::LU(src1.ptr<float>(), src1.step, n, dst.ptr<float>(), dst.step, n) != 0;
|
||||
else if( method == DECOMP_LU && type == CV_64F )
|
||||
result = LU(src1.ptr<double>(), src1.step, n, dst.ptr<double>(), dst.step, n) != 0;
|
||||
result = hal::LU(src1.ptr<double>(), src1.step, n, dst.ptr<double>(), dst.step, n) != 0;
|
||||
else if( method == DECOMP_CHOLESKY && type == CV_32F )
|
||||
result = Cholesky(src1.ptr<float>(), src1.step, n, dst.ptr<float>(), dst.step, n);
|
||||
result = hal::Cholesky(src1.ptr<float>(), src1.step, n, dst.ptr<float>(), dst.step, n);
|
||||
else
|
||||
result = Cholesky(src1.ptr<double>(), src1.step, n, dst.ptr<double>(), dst.step, n);
|
||||
result = hal::Cholesky(src1.ptr<double>(), src1.step, n, dst.ptr<double>(), dst.step, n);
|
||||
|
||||
if( !result )
|
||||
dst = Scalar(0);
|
||||
@@ -1407,16 +1245,16 @@ bool cv::solve( InputArray _src, InputArray _src2arg, OutputArray _dst, int meth
|
||||
if( method == DECOMP_LU )
|
||||
{
|
||||
if( type == CV_32F )
|
||||
result = LU(a.ptr<float>(), a.step, n, dst.ptr<float>(), dst.step, nb) != 0;
|
||||
result = hal::LU(a.ptr<float>(), a.step, n, dst.ptr<float>(), dst.step, nb) != 0;
|
||||
else
|
||||
result = LU(a.ptr<double>(), a.step, n, dst.ptr<double>(), dst.step, nb) != 0;
|
||||
result = hal::LU(a.ptr<double>(), a.step, n, dst.ptr<double>(), dst.step, nb) != 0;
|
||||
}
|
||||
else if( method == DECOMP_CHOLESKY )
|
||||
{
|
||||
if( type == CV_32F )
|
||||
result = Cholesky(a.ptr<float>(), a.step, n, dst.ptr<float>(), dst.step, nb);
|
||||
result = hal::Cholesky(a.ptr<float>(), a.step, n, dst.ptr<float>(), dst.step, nb);
|
||||
else
|
||||
result = Cholesky(a.ptr<double>(), a.step, n, dst.ptr<double>(), dst.step, nb);
|
||||
result = hal::Cholesky(a.ptr<double>(), a.step, n, dst.ptr<double>(), dst.step, nb);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user