From e48a456d4887940213b49c392de448a5dcebc398 Mon Sep 17 00:00:00 2001 From: Vadim Pisarevsky Date: Mon, 30 Aug 2010 16:37:22 +0000 Subject: [PATCH] optimized lapack' SVD for noticeably better performance on small matrices --- 3rdparty/include/cblas.h | 19 +- 3rdparty/include/clapack.h | 7 - 3rdparty/include/f2c.h | 5 + 3rdparty/lapack/dgemv.c | 312 ----- 3rdparty/lapack/dgemv_custom.c | 238 ++++ 3rdparty/lapack/{dger.c => dger_custom.c} | 153 +-- 3rdparty/lapack/dlamch.c | 1047 ----------------- 3rdparty/lapack/dlamch_custom.c | 58 + 3rdparty/lapack/{dlartg.c => dlartg_custom.c} | 42 +- 3rdparty/lapack/{dlasr.c => dlasr_custom.c} | 0 3rdparty/lapack/ilaenv.c | 654 ---------- 3rdparty/lapack/ilaenv_custom.c | 191 +++ 3rdparty/lapack/sgemv.c | 312 ----- 3rdparty/lapack/sgemv_custom.c | 204 ++++ 3rdparty/lapack/{sger.c => sger_custom.c} | 161 ++- 3rdparty/lapack/slamch.c | 1045 ---------------- 3rdparty/lapack/slamch_custom.c | 88 ++ 3rdparty/lapack/{slartg.c => slartg_custom.c} | 41 +- 3rdparty/lapack/{slasr.c => slasr_custom.c} | 0 19 files changed, 957 insertions(+), 3620 deletions(-) delete mode 100644 3rdparty/lapack/dgemv.c create mode 100644 3rdparty/lapack/dgemv_custom.c rename 3rdparty/lapack/{dger.c => dger_custom.c} (56%) delete mode 100644 3rdparty/lapack/dlamch.c create mode 100644 3rdparty/lapack/dlamch_custom.c rename 3rdparty/lapack/{dlartg.c => dlartg_custom.c} (80%) rename 3rdparty/lapack/{dlasr.c => dlasr_custom.c} (100%) delete mode 100644 3rdparty/lapack/ilaenv.c create mode 100644 3rdparty/lapack/ilaenv_custom.c delete mode 100644 3rdparty/lapack/sgemv.c create mode 100644 3rdparty/lapack/sgemv_custom.c rename 3rdparty/lapack/{sger.c => sger_custom.c} (52%) delete mode 100644 3rdparty/lapack/slamch.c create mode 100644 3rdparty/lapack/slamch_custom.c rename 3rdparty/lapack/{slartg.c => slartg_custom.c} (80%) rename 3rdparty/lapack/{slasr.c => slasr_custom.c} (100%) diff --git a/3rdparty/include/cblas.h b/3rdparty/include/cblas.h index 26f07c069..883475891 100644 --- a/3rdparty/include/cblas.h +++ b/3rdparty/include/cblas.h @@ -37,11 +37,28 @@ static __inline double r_sign(real *a, real *b) return *b >= 0 ? x : -x; } +extern const unsigned char lapack_toupper_tab[]; +#define lapack_toupper(c) ((char)lapack_toupper_tab[(unsigned char)(c)]) + +extern const unsigned char lapack_lamch_tab[]; +extern const doublereal lapack_dlamch_tab[]; +extern const doublereal lapack_slamch_tab[]; + static __inline logical lsame_(char *ca, char *cb) { - return toupper(ca[0]) == toupper(cb[0]); + return lapack_toupper(ca[0]) == lapack_toupper(cb[0]); } +static __inline doublereal dlamch_(char* cmach) +{ + return lapack_dlamch_tab[lapack_lamch_tab[(unsigned char)cmach[0]]]; +} + +static __inline doublereal slamch_(char* cmach) +{ + return lapack_slamch_tab[lapack_lamch_tab[(unsigned char)cmach[0]]]; +} + static __inline integer i_nint(real *x) { return (integer)(*x >= 0 ? floor(*x + .5) : -floor(.5 - *x)); diff --git a/3rdparty/include/clapack.h b/3rdparty/include/clapack.h index 1be8351a0..6d14714c2 100644 --- a/3rdparty/include/clapack.h +++ b/3rdparty/include/clapack.h @@ -3680,8 +3680,6 @@ doublereal dsecnd_(); doublereal second_(); -doublereal slamch_(char *cmach); - /* Subroutine */ int slamc1_(integer *beta, integer *t, logical *rnd, logical *ieee1); @@ -3696,8 +3694,6 @@ doublereal slamc3_(real *a, real *b); logical *ieee, integer *emax, real *rmax); -doublereal dlamch_(char *cmach); - /* Subroutine */ int dlamc1_(integer *beta, integer *t, logical *rnd, logical *ieee1); @@ -3712,9 +3708,6 @@ doublereal dlamc3_(doublereal *a, doublereal *b); /* Subroutine */ int dlamc5_(integer *beta, integer *p, integer *emin, logical *ieee, integer *emax, doublereal *rmax); -integer ilaenv_(integer *ispec, char *name__, char *opts, integer *n1, - integer *n2, integer *n3, integer *n4); - #ifdef __cplusplus } #endif diff --git a/3rdparty/include/f2c.h b/3rdparty/include/f2c.h index 09a4f7d79..006efa498 100644 --- a/3rdparty/include/f2c.h +++ b/3rdparty/include/f2c.h @@ -7,6 +7,7 @@ #ifndef F2C_INCLUDE #define F2C_INCLUDE +#include #include #include #include @@ -17,6 +18,10 @@ #include #include +#if __SSE2__ || defined _M_X64 +#include "emmintrin.h" +#endif + #ifdef __cplusplus extern "C" { #endif diff --git a/3rdparty/lapack/dgemv.c b/3rdparty/lapack/dgemv.c deleted file mode 100644 index 696fd0129..000000000 --- a/3rdparty/lapack/dgemv.c +++ /dev/null @@ -1,312 +0,0 @@ -/* dgemv.f -- translated by f2c (version 20061008). - You must link the resulting object file with libf2c: - on Microsoft Windows system, link with libf2c.lib; - on Linux or Unix systems, link with .../path/to/libf2c.a -lm - or, if you install libf2c.a in a standard place, with -lf2c -lm - -- in that order, at the end of the command line, as in - cc *.o -lf2c -lm - Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., - - http://www.netlib.org/f2c/libf2c.zip -*/ - -#include "clapack.h" - - -/* Subroutine */ int dgemv_(char *trans, integer *m, integer *n, doublereal * - alpha, doublereal *a, integer *lda, doublereal *x, integer *incx, - doublereal *beta, doublereal *y, integer *incy) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2; - - /* Local variables */ - integer i__, j, ix, iy, jx, jy, kx, ky, info; - doublereal temp; - integer lenx, leny; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int xerbla_(char *, integer *); - -/* .. Scalar Arguments .. */ -/* .. */ -/* .. Array Arguments .. */ -/* .. */ - -/* Purpose */ -/* ======= */ - -/* DGEMV performs one of the matrix-vector operations */ - -/* y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, */ - -/* where alpha and beta are scalars, x and y are vectors and A is an */ -/* m by n matrix. */ - -/* Arguments */ -/* ========== */ - -/* TRANS - CHARACTER*1. */ -/* On entry, TRANS specifies the operation to be performed as */ -/* follows: */ - -/* TRANS = 'N' or 'n' y := alpha*A*x + beta*y. */ - -/* TRANS = 'T' or 't' y := alpha*A'*x + beta*y. */ - -/* TRANS = 'C' or 'c' y := alpha*A'*x + beta*y. */ - -/* Unchanged on exit. */ - -/* M - INTEGER. */ -/* On entry, M specifies the number of rows of the matrix A. */ -/* M must be at least zero. */ -/* Unchanged on exit. */ - -/* N - INTEGER. */ -/* On entry, N specifies the number of columns of the matrix A. */ -/* N must be at least zero. */ -/* Unchanged on exit. */ - -/* ALPHA - DOUBLE PRECISION. */ -/* On entry, ALPHA specifies the scalar alpha. */ -/* Unchanged on exit. */ - -/* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */ -/* Before entry, the leading m by n part of the array A must */ -/* contain the matrix of coefficients. */ -/* Unchanged on exit. */ - -/* LDA - INTEGER. */ -/* On entry, LDA specifies the first dimension of A as declared */ -/* in the calling (sub) program. LDA must be at least */ -/* max( 1, m ). */ -/* Unchanged on exit. */ - -/* X - DOUBLE PRECISION array of DIMENSION at least */ -/* ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' */ -/* and at least */ -/* ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. */ -/* Before entry, the incremented array X must contain the */ -/* vector x. */ -/* Unchanged on exit. */ - -/* INCX - INTEGER. */ -/* On entry, INCX specifies the increment for the elements of */ -/* X. INCX must not be zero. */ -/* Unchanged on exit. */ - -/* BETA - DOUBLE PRECISION. */ -/* On entry, BETA specifies the scalar beta. When BETA is */ -/* supplied as zero then Y need not be set on input. */ -/* Unchanged on exit. */ - -/* Y - DOUBLE PRECISION array of DIMENSION at least */ -/* ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' */ -/* and at least */ -/* ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. */ -/* Before entry with BETA non-zero, the incremented array Y */ -/* must contain the vector y. On exit, Y is overwritten by the */ -/* updated vector y. */ - -/* INCY - INTEGER. */ -/* On entry, INCY specifies the increment for the elements of */ -/* Y. INCY must not be zero. */ -/* Unchanged on exit. */ - - -/* Level 2 Blas routine. */ - -/* -- Written on 22-October-1986. */ -/* Jack Dongarra, Argonne National Lab. */ -/* Jeremy Du Croz, Nag Central Office. */ -/* Sven Hammarling, Nag Central Office. */ -/* Richard Hanson, Sandia National Labs. */ - - -/* .. Parameters .. */ -/* .. */ -/* .. Local Scalars .. */ -/* .. */ -/* .. External Functions .. */ -/* .. */ -/* .. External Subroutines .. */ -/* .. */ -/* .. Intrinsic Functions .. */ -/* .. */ - -/* Test the input parameters. */ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --x; - --y; - - /* Function Body */ - info = 0; - if (! lsame_(trans, "N") && ! lsame_(trans, "T") && ! lsame_(trans, "C") - ) { - info = 1; - } else if (*m < 0) { - info = 2; - } else if (*n < 0) { - info = 3; - } else if (*lda < max(1,*m)) { - info = 6; - } else if (*incx == 0) { - info = 8; - } else if (*incy == 0) { - info = 11; - } - if (info != 0) { - xerbla_("DGEMV ", &info); - return 0; - } - -/* Quick return if possible. */ - - if (*m == 0 || *n == 0 || *alpha == 0. && *beta == 1.) { - return 0; - } - -/* Set LENX and LENY, the lengths of the vectors x and y, and set */ -/* up the start points in X and Y. */ - - if (lsame_(trans, "N")) { - lenx = *n; - leny = *m; - } else { - lenx = *m; - leny = *n; - } - if (*incx > 0) { - kx = 1; - } else { - kx = 1 - (lenx - 1) * *incx; - } - if (*incy > 0) { - ky = 1; - } else { - ky = 1 - (leny - 1) * *incy; - } - -/* Start the operations. In this version the elements of A are */ -/* accessed sequentially with one pass through A. */ - -/* First form y := beta*y. */ - - if (*beta != 1.) { - if (*incy == 1) { - if (*beta == 0.) { - i__1 = leny; - for (i__ = 1; i__ <= i__1; ++i__) { - y[i__] = 0.; -/* L10: */ - } - } else { - i__1 = leny; - for (i__ = 1; i__ <= i__1; ++i__) { - y[i__] = *beta * y[i__]; -/* L20: */ - } - } - } else { - iy = ky; - if (*beta == 0.) { - i__1 = leny; - for (i__ = 1; i__ <= i__1; ++i__) { - y[iy] = 0.; - iy += *incy; -/* L30: */ - } - } else { - i__1 = leny; - for (i__ = 1; i__ <= i__1; ++i__) { - y[iy] = *beta * y[iy]; - iy += *incy; -/* L40: */ - } - } - } - } - if (*alpha == 0.) { - return 0; - } - if (lsame_(trans, "N")) { - -/* Form y := alpha*A*x + y. */ - - jx = kx; - if (*incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (x[jx] != 0.) { - temp = *alpha * x[jx]; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - y[i__] += temp * a[i__ + j * a_dim1]; -/* L50: */ - } - } - jx += *incx; -/* L60: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (x[jx] != 0.) { - temp = *alpha * x[jx]; - iy = ky; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - y[iy] += temp * a[i__ + j * a_dim1]; - iy += *incy; -/* L70: */ - } - } - jx += *incx; -/* L80: */ - } - } - } else { - -/* Form y := alpha*A'*x + y. */ - - jy = ky; - if (*incx == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp = 0.; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - temp += a[i__ + j * a_dim1] * x[i__]; -/* L90: */ - } - y[jy] += *alpha * temp; - jy += *incy; -/* L100: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp = 0.; - ix = kx; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - temp += a[i__ + j * a_dim1] * x[ix]; - ix += *incx; -/* L110: */ - } - y[jy] += *alpha * temp; - jy += *incy; -/* L120: */ - } - } - } - - return 0; - -/* End of DGEMV . */ - -} /* dgemv_ */ diff --git a/3rdparty/lapack/dgemv_custom.c b/3rdparty/lapack/dgemv_custom.c new file mode 100644 index 000000000..6edee4e2b --- /dev/null +++ b/3rdparty/lapack/dgemv_custom.c @@ -0,0 +1,238 @@ +#include "clapack.h" + + +/* Subroutine */ int dgemv_(char *_trans, integer *_m, integer *_n, doublereal * + _alpha, doublereal *a, integer *_lda, doublereal *x, integer *_incx, + doublereal *_beta, doublereal *y, integer *_incy) +{ +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGEMV performs one of the matrix-vector operations */ + +/* y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, */ + +/* where alpha and beta are scalars, x and y are vectors and A is an */ +/* m by n matrix. */ + +/* Arguments */ +/* ========== */ + +/* TRANS - CHARACTER*1. */ +/* On entry, TRANS specifies the operation to be performed as */ +/* follows: */ + +/* TRANS = 'N' or 'n' y := alpha*A*x + beta*y. */ + +/* TRANS = 'T' or 't' y := alpha*A'*x + beta*y. */ + +/* TRANS = 'C' or 'c' y := alpha*A'*x + beta*y. */ + +/* Unchanged on exit. */ + +/* M - INTEGER. */ +/* On entry, M specifies the number of rows of the matrix A. */ +/* M must be at least zero. */ +/* Unchanged on exit. */ + +/* N - INTEGER. */ +/* On entry, N specifies the number of columns of the matrix A. */ +/* N must be at least zero. */ +/* Unchanged on exit. */ + +/* ALPHA - DOUBLE PRECISION. */ +/* On entry, ALPHA specifies the scalar alpha. */ +/* Unchanged on exit. */ + +/* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */ +/* Before entry, the leading m by n part of the array A must */ +/* contain the matrix of coefficients. */ +/* Unchanged on exit. */ + +/* LDA - INTEGER. */ +/* On entry, LDA specifies the first dimension of A as declared */ +/* in the calling (sub) program. LDA must be at least */ +/* max( 1, m ). */ +/* Unchanged on exit. */ + +/* X - DOUBLE PRECISION array of DIMENSION at least */ +/* ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' */ +/* and at least */ +/* ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. */ +/* Before entry, the incremented array X must contain the */ +/* vector x. */ +/* Unchanged on exit. */ + +/* INCX - INTEGER. */ +/* On entry, INCX specifies the increment for the elements of */ +/* X. INCX must not be zero. */ +/* Unchanged on exit. */ + +/* BETA - DOUBLE PRECISION. */ +/* On entry, BETA specifies the scalar beta. When BETA is */ +/* supplied as zero then Y need not be set on input. */ +/* Unchanged on exit. */ + +/* Y - DOUBLE PRECISION array of DIMENSION at least */ +/* ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' */ +/* and at least */ +/* ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. */ +/* Before entry with BETA non-zero, the incremented array Y */ +/* must contain the vector y. On exit, Y is overwritten by the */ +/* updated vector y. */ + +/* INCY - INTEGER. */ +/* On entry, INCY specifies the increment for the elements of */ +/* Y. INCY must not be zero. */ +/* Unchanged on exit. */ + + +/* Level 2 Blas routine. */ + +/* -- Written on 22-October-1986. */ +/* Jack Dongarra, Argonne National Lab. */ +/* Jeremy Du Croz, Nag Central Office. */ +/* Sven Hammarling, Nag Central Office. */ +/* Richard Hanson, Sandia National Labs. */ + + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + +/* Test the input parameters. */ + + char trans = lapack_toupper(_trans[0]); + integer i, j, m = *_m, n = *_n, lda = *_lda, incx = *_incx, incy = *_incy; + integer leny = trans == 'N' ? m : n, lenx = trans == 'N' ? n : m; + real alpha = *_alpha, beta = *_beta; + + integer info = 0; + if (trans != 'N' && trans != 'T' && trans != 'C') + info = 1; + else if (m < 0) + info = 2; + else if (n < 0) + info = 3; + else if (lda < max(1,m)) + info = 6; + else if (incx == 0) + info = 8; + else if (incy == 0) + info = 11; + + if (info != 0) + { + xerbla_("SGEMV ", &info); + return 0; + } + + if( incy < 0 ) + y -= incy*(leny - 1); + if( incx < 0 ) + x -= incx*(lenx - 1); + + /* Start the operations. In this version the elements of A are */ + /* accessed sequentially with one pass through A. */ + + if( beta != 1. ) + { + if( incy == 1 ) + { + if( beta == 0. ) + for( i = 0; i < leny; i++ ) + y[i] = 0.; + else + for( i = 0; i < leny; i++ ) + y[i] *= beta; + } + else + { + if( beta == 0. ) + for( i = 0; i < leny; i++ ) + y[i*incy] = 0.; + else + for( i = 0; i < leny; i++ ) + y[i*incy] *= beta; + } + } + + if( alpha == 0. ) + ; + else if( trans == 'N' ) + { + if( incy == 1 ) + { + for( i = 0; i < n; i++, a += lda ) + { + doublereal s = x[i*incx]; + if( s == 0. ) + continue; + s *= alpha; + for( j = 0; j <= m - 2; j += 2 ) + { + doublereal t0 = y[j] + s*a[j]; + doublereal t1 = y[j+1] + s*a[j+1]; + y[j] = t0; y[j+1] = t1; + } + + for( ; j < m; j++ ) + y[j] += s*a[j]; + } + } + else + { + for( i = 0; i < n; i++, a += lda ) + { + doublereal s = x[i*incx]; + if( s == 0. ) + continue; + s *= alpha; + for( j = 0; j < m; j++ ) + y[j*incy] += s*a[j]; + } + } + } + else + { + if( incx == 1 ) + { + for( i = 0; i < n; i++, a += lda ) + { + doublereal s = 0; + for( j = 0; j <= m - 2; j += 2 ) + s += x[j]*a[j] + x[j+1]*a[j+1]; + for( ; j < m; j++ ) + s += x[j]*a[j]; + y[i*incy] += alpha*s; + } + } + else + { + for( i = 0; i < n; i++, a += lda ) + { + doublereal s = 0; + for( j = 0; j < m; j++ ) + s += x[j*incx]*a[j]; + y[i*incy] += alpha*s; + } + } + } + + return 0; + +/* End of DGEMV . */ + +} /* dgemv_ */ diff --git a/3rdparty/lapack/dger.c b/3rdparty/lapack/dger_custom.c similarity index 56% rename from 3rdparty/lapack/dger.c rename to 3rdparty/lapack/dger_custom.c index e581c8a8c..4296e02d3 100644 --- a/3rdparty/lapack/dger.c +++ b/3rdparty/lapack/dger_custom.c @@ -1,29 +1,10 @@ -/* dger.f -- translated by f2c (version 20061008). - You must link the resulting object file with libf2c: - on Microsoft Windows system, link with libf2c.lib; - on Linux or Unix systems, link with .../path/to/libf2c.a -lm - or, if you install libf2c.a in a standard place, with -lf2c -lm - -- in that order, at the end of the command line, as in - cc *.o -lf2c -lm - Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., - - http://www.netlib.org/f2c/libf2c.zip -*/ - #include "clapack.h" -/* Subroutine */ int dger_(integer *m, integer *n, doublereal *alpha, - doublereal *x, integer *incx, doublereal *y, integer *incy, - doublereal *a, integer *lda) +/* Subroutine */ int dger_(integer *_m, integer *_n, doublereal *_alpha, + doublereal *x, integer *_incx, doublereal *y, integer *_incy, + doublereal *a, integer *_lda) { - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2; - - /* Local variables */ - integer i__, j, ix, jy, kx, info; - doublereal temp; - extern /* Subroutine */ int xerbla_(char *, integer *); /* .. Scalar Arguments .. */ /* .. */ @@ -111,80 +92,70 @@ /* Test the input parameters. */ - /* Parameter adjustments */ - --x; - --y; - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - /* Function Body */ - info = 0; - if (*m < 0) { - info = 1; - } else if (*n < 0) { - info = 2; - } else if (*incx == 0) { - info = 5; - } else if (*incy == 0) { - info = 7; - } else if (*lda < max(1,*m)) { - info = 9; - } - if (info != 0) { - xerbla_("DGER ", &info); - return 0; + integer i, j, m = *_m, n = *_n, incx = *_incx, incy = *_incy, lda = *_lda; + doublereal alpha = *_alpha; + integer info = 0; + + if (m < 0) + info = 1; + else if (n < 0) + info = 2; + else if (incx == 0) + info = 5; + else if (incy == 0) + info = 7; + else if (lda < max(1,m)) + info = 9; + + if (info != 0) + { + xerbla_("DGER ", &info); + return 0; } -/* Quick return if possible. */ + if (incx < 0) + x -= (m-1)*incx; + if (incy < 0) + y -= (n-1)*incy; - if (*m == 0 || *n == 0 || *alpha == 0.) { - return 0; + /* Start the operations. In this version the elements of A are */ + /* accessed sequentially with one pass through A. */ + + if( alpha == 0 ) + ; + else if( incx == 1 ) + { + for( j = 0; j < n; j++, a += lda ) + { + doublereal s = y[j*incy]; + if( s == 0 ) + continue; + s *= alpha; + + for( i = 0; i <= m - 2; i += 2 ) + { + doublereal t0 = a[i] + x[i]*s; + doublereal t1 = a[i+1] + x[i+1]*s; + a[i] = t0; a[i+1] = t1; + } + + for( ; i < m; i++ ) + a[i] += x[i]*s; + } } - -/* Start the operations. In this version the elements of A are */ -/* accessed sequentially with one pass through A. */ - - if (*incy > 0) { - jy = 1; - } else { - jy = 1 - (*n - 1) * *incy; - } - if (*incx == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (y[jy] != 0.) { - temp = *alpha * y[jy]; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - a[i__ + j * a_dim1] += x[i__] * temp; -/* L10: */ - } - } - jy += *incy; -/* L20: */ - } - } else { - if (*incx > 0) { - kx = 1; - } else { - kx = 1 - (*m - 1) * *incx; - } - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (y[jy] != 0.) { - temp = *alpha * y[jy]; - ix = kx; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - a[i__ + j * a_dim1] += x[ix] * temp; - ix += *incx; -/* L30: */ - } - } - jy += *incy; -/* L40: */ - } + else + { + for( j = 0; j < n; j++, a += lda ) + { + doublereal s = y[j*incy]; + if( s == 0 ) + continue; + s *= alpha; + + for( i = 0; i < m; i++ ) + a[i] += x[i*incx]*s; + } } return 0; diff --git a/3rdparty/lapack/dlamch.c b/3rdparty/lapack/dlamch.c deleted file mode 100644 index d9259a73b..000000000 --- a/3rdparty/lapack/dlamch.c +++ /dev/null @@ -1,1047 +0,0 @@ -#include "clapack.h" -#include -#include - -/* *********************************************************************** */ - -doublereal dlamc3_(doublereal *a, doublereal *b) -{ - /* System generated locals */ - doublereal ret_val; - - - /* -- LAPACK auxiliary routine (version 3.1) -- */ - /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ - /* November 2006 */ - - /* .. Scalar Arguments .. */ - /* .. */ - - /* Purpose */ - /* ======= */ - - /* DLAMC3 is intended to force A and B to be stored prior to doing */ - /* the addition of A and B , for use in situations where optimizers */ - /* might hold one of these in a register. */ - - /* Arguments */ - /* ========= */ - - /* A (input) DOUBLE PRECISION */ - /* B (input) DOUBLE PRECISION */ - /* The values A and B. */ - - /* ===================================================================== */ - - /* .. Executable Statements .. */ - - ret_val = *a + *b; - - return ret_val; - - /* End of DLAMC3 */ - -} /* dlamc3_ */ - - -#if 1 - -/* simpler version of dlamch for the case of IEEE754-compliant FPU module by Piotr Luszczek S. - taken from http://www.mail-archive.com/numpy-discussion@lists.sourceforge.net/msg02448.html */ - -#ifndef DBL_DIGITS -#define DBL_DIGITS 53 -#endif - -doublereal -dlamch_(char *cmach) { - char ch = cmach[0]; - double eps = DBL_EPSILON, sfmin, small; - - if ('B' == ch || 'b' == ch) { - return FLT_RADIX; - } else if ('E' == ch || 'e' == ch) { - return eps; - } else if ('L' == ch || 'l' == ch) { - return DBL_MAX_EXP; - } else if ('M' == ch || 'm' == ch) { - return DBL_MIN_EXP; - } else if ('N' == ch || 'n' == ch) { - return DBL_DIGITS; - } else if ('O' == ch || 'o' == ch) { - return DBL_MAX; - } else if ('P' == ch || 'p' == ch) { - return eps * FLT_RADIX; - } else if ('R' == ch || 'r' == ch) { - return FLT_ROUNDS < 2; - } else if ('S' == ch || 's' == ch) { - /* Use SMALL plus a bit, to avoid the possibility of rounding causing overflow - when computing 1/sfmin. */ - sfmin = DBL_MIN; - small = 2. / DBL_MAX; - if (small <= sfmin) small = sfmin * (1 + eps); - return small; - } else if ('U' == ch || 'u' == ch) { - return DBL_MIN; - } - - return 0; -} - -#else - -/* Table of constant values */ - -static integer c__1 = 1; -static doublereal c_b32 = 0.; - -doublereal dlamch_(char *cmach) -{ - /* Initialized data */ - - static logical first = TRUE_; - - /* System generated locals */ - integer i__1; - doublereal ret_val; - - /* Builtin functions */ - double pow_di(doublereal *, integer *); - - /* Local variables */ - static doublereal t; - integer it; - static doublereal rnd, eps, base; - integer beta; - static doublereal emin, prec, emax; - integer imin, imax; - logical lrnd; - static doublereal rmin, rmax; - doublereal rmach; - extern logical lsame_(char *, char *); - doublereal small; - static doublereal sfmin; - extern /* Subroutine */ int dlamc2_(integer *, integer *, logical *, - doublereal *, integer *, doublereal *, integer *, doublereal *); - - -/* -- LAPACK auxiliary routine (version 3.1) -- */ -/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ -/* November 2006 */ - -/* .. Scalar Arguments .. */ -/* .. */ - -/* Purpose */ -/* ======= */ - -/* DLAMCH determines double precision machine parameters. */ - -/* Arguments */ -/* ========= */ - -/* CMACH (input) CHARACTER*1 */ -/* Specifies the value to be returned by DLAMCH: */ -/* = 'E' or 'e', DLAMCH := eps */ -/* = 'S' or 's , DLAMCH := sfmin */ -/* = 'B' or 'b', DLAMCH := base */ -/* = 'P' or 'p', DLAMCH := eps*base */ -/* = 'N' or 'n', DLAMCH := t */ -/* = 'R' or 'r', DLAMCH := rnd */ -/* = 'M' or 'm', DLAMCH := emin */ -/* = 'U' or 'u', DLAMCH := rmin */ -/* = 'L' or 'l', DLAMCH := emax */ -/* = 'O' or 'o', DLAMCH := rmax */ - -/* where */ - -/* eps = relative machine precision */ -/* sfmin = safe minimum, such that 1/sfmin does not overflow */ -/* base = base of the machine */ -/* prec = eps*base */ -/* t = number of (base) digits in the mantissa */ -/* rnd = 1.0 when rounding occurs in addition, 0.0 otherwise */ -/* emin = minimum exponent before (gradual) underflow */ -/* rmin = underflow threshold - base**(emin-1) */ -/* emax = largest exponent before overflow */ -/* rmax = overflow threshold - (base**emax)*(1-eps) */ - -/* ===================================================================== */ - -/* .. Parameters .. */ -/* .. */ -/* .. Local Scalars .. */ -/* .. */ -/* .. External Functions .. */ -/* .. */ -/* .. External Subroutines .. */ -/* .. */ -/* .. Save statement .. */ -/* .. */ -/* .. Data statements .. */ -/* .. */ -/* .. Executable Statements .. */ - - if (first) { - dlamc2_(&beta, &it, &lrnd, &eps, &imin, &rmin, &imax, &rmax); - base = (doublereal) beta; - t = (doublereal) it; - if (lrnd) { - rnd = 1.; - i__1 = 1 - it; - eps = pow_di(&base, &i__1) / 2; - } else { - rnd = 0.; - i__1 = 1 - it; - eps = pow_di(&base, &i__1); - } - prec = eps * base; - emin = (doublereal) imin; - emax = (doublereal) imax; - sfmin = rmin; - small = 1. / rmax; - if (small >= sfmin) { - -/* Use SMALL plus a bit, to avoid the possibility of rounding */ -/* causing overflow when computing 1/sfmin. */ - - sfmin = small * (eps + 1.); - } - } - - if (lsame_(cmach, "E")) { - rmach = eps; - } else if (lsame_(cmach, "S")) { - rmach = sfmin; - } else if (lsame_(cmach, "B")) { - rmach = base; - } else if (lsame_(cmach, "P")) { - rmach = prec; - } else if (lsame_(cmach, "N")) { - rmach = t; - } else if (lsame_(cmach, "R")) { - rmach = rnd; - } else if (lsame_(cmach, "M")) { - rmach = emin; - } else if (lsame_(cmach, "U")) { - rmach = rmin; - } else if (lsame_(cmach, "L")) { - rmach = emax; - } else if (lsame_(cmach, "O")) { - rmach = rmax; - } - - ret_val = rmach; - first = FALSE_; - return ret_val; - -/* End of DLAMCH */ - -} /* dlamch_ */ - - -/* *********************************************************************** */ - -/* Subroutine */ int dlamc1_(integer *beta, integer *t, logical *rnd, logical - *ieee1) -{ - /* Initialized data */ - - static logical first = TRUE_; - - /* System generated locals */ - doublereal d__1, d__2; - - /* Local variables */ - doublereal a, b, c__, f, t1, t2; - static integer lt; - doublereal one, qtr; - static logical lrnd; - static integer lbeta; - doublereal savec; - extern doublereal dlamc3_(doublereal *, doublereal *); - static logical lieee1; - - -/* -- LAPACK auxiliary routine (version 3.1) -- */ -/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ -/* November 2006 */ - -/* .. Scalar Arguments .. */ -/* .. */ - -/* Purpose */ -/* ======= */ - -/* DLAMC1 determines the machine parameters given by BETA, T, RND, and */ -/* IEEE1. */ - -/* Arguments */ -/* ========= */ - -/* BETA (output) INTEGER */ -/* The base of the machine. */ - -/* T (output) INTEGER */ -/* The number of ( BETA ) digits in the mantissa. */ - -/* RND (output) LOGICAL */ -/* Specifies whether proper rounding ( RND = .TRUE. ) or */ -/* chopping ( RND = .FALSE. ) occurs in addition. This may not */ -/* be a reliable guide to the way in which the machine performs */ -/* its arithmetic. */ - -/* IEEE1 (output) LOGICAL */ -/* Specifies whether rounding appears to be done in the IEEE */ -/* 'round to nearest' style. */ - -/* Further Details */ -/* =============== */ - -/* The routine is based on the routine ENVRON by Malcolm and */ -/* incorporates suggestions by Gentleman and Marovich. See */ - -/* Malcolm M. A. (1972) Algorithms to reveal properties of */ -/* floating-point arithmetic. Comms. of the ACM, 15, 949-951. */ - -/* Gentleman W. M. and Marovich S. B. (1974) More on algorithms */ -/* that reveal properties of floating point arithmetic units. */ -/* Comms. of the ACM, 17, 276-277. */ - -/* ===================================================================== */ - -/* .. Local Scalars .. */ -/* .. */ -/* .. External Functions .. */ -/* .. */ -/* .. Save statement .. */ -/* .. */ -/* .. Data statements .. */ -/* .. */ -/* .. Executable Statements .. */ - - if (first) { - one = 1.; - -/* LBETA, LIEEE1, LT and LRND are the local values of BETA, */ -/* IEEE1, T and RND. */ - -/* Throughout this routine we use the function DLAMC3 to ensure */ -/* that relevant values are stored and not held in registers, or */ -/* are not affected by optimizers. */ - -/* Compute a = 2.0**m with the smallest positive integer m such */ -/* that */ - -/* fl( a + 1.0 ) = a. */ - - a = 1.; - c__ = 1.; - -/* + WHILE( C.EQ.ONE )LOOP */ -L10: - if (c__ == one) { - a *= 2; - c__ = dlamc3_(&a, &one); - d__1 = -a; - c__ = dlamc3_(&c__, &d__1); - goto L10; - } -/* + END WHILE */ - -/* Now compute b = 2.0**m with the smallest positive integer m */ -/* such that */ - -/* fl( a + b ) .gt. a. */ - - b = 1.; - c__ = dlamc3_(&a, &b); - -/* + WHILE( C.EQ.A )LOOP */ -L20: - if (c__ == a) { - b *= 2; - c__ = dlamc3_(&a, &b); - goto L20; - } -/* + END WHILE */ - -/* Now compute the base. a and c are neighbouring floating point */ -/* numbers in the interval ( beta**t, beta**( t + 1 ) ) and so */ -/* their difference is beta. Adding 0.25 to c is to ensure that it */ -/* is truncated to beta and not ( beta - 1 ). */ - - qtr = one / 4; - savec = c__; - d__1 = -a; - c__ = dlamc3_(&c__, &d__1); - lbeta = (integer) (c__ + qtr); - -/* Now determine whether rounding or chopping occurs, by adding a */ -/* bit less than beta/2 and a bit more than beta/2 to a. */ - - b = (doublereal) lbeta; - d__1 = b / 2; - d__2 = -b / 100; - f = dlamc3_(&d__1, &d__2); - c__ = dlamc3_(&f, &a); - if (c__ == a) { - lrnd = TRUE_; - } else { - lrnd = FALSE_; - } - d__1 = b / 2; - d__2 = b / 100; - f = dlamc3_(&d__1, &d__2); - c__ = dlamc3_(&f, &a); - if (lrnd && c__ == a) { - lrnd = FALSE_; - } - -/* Try and decide whether rounding is done in the IEEE 'round to */ -/* nearest' style. B/2 is half a unit in the last place of the two */ -/* numbers A and SAVEC. Furthermore, A is even, i.e. has last bit */ -/* zero, and SAVEC is odd. Thus adding B/2 to A should not change */ -/* A, but adding B/2 to SAVEC should change SAVEC. */ - - d__1 = b / 2; - t1 = dlamc3_(&d__1, &a); - d__1 = b / 2; - t2 = dlamc3_(&d__1, &savec); - lieee1 = t1 == a && t2 > savec && lrnd; - -/* Now find the mantissa, t. It should be the integer part of */ -/* log to the base beta of a, however it is safer to determine t */ -/* by powering. So we find t as the smallest positive integer for */ -/* which */ - -/* fl( beta**t + 1.0 ) = 1.0. */ - - lt = 0; - a = 1.; - c__ = 1.; - -/* + WHILE( C.EQ.ONE )LOOP */ -L30: - if (c__ == one) { - ++lt; - a *= lbeta; - c__ = dlamc3_(&a, &one); - d__1 = -a; - c__ = dlamc3_(&c__, &d__1); - goto L30; - } -/* + END WHILE */ - - } - - *beta = lbeta; - *t = lt; - *rnd = lrnd; - *ieee1 = lieee1; - first = FALSE_; - return 0; - -/* End of DLAMC1 */ - -} /* dlamc1_ */ - - -/* *********************************************************************** */ - -/* Subroutine */ int dlamc2_(integer *beta, integer *t, logical *rnd, - doublereal *eps, integer *emin, doublereal *rmin, integer *emax, - doublereal *rmax) -{ - /* Initialized data */ - - static logical first = TRUE_; - static logical iwarn = FALSE_; - - /* Format strings */ - static char fmt_9999[] = "(//\002 WARNING. The value EMIN may be incorre" - "ct:-\002,\002 EMIN = \002,i8,/\002 If, after inspection, the va" - "lue EMIN looks\002,\002 acceptable please comment out \002,/\002" - " the IF block as marked within the code of routine\002,\002 DLAM" - "C2,\002,/\002 otherwise supply EMIN explicitly.\002,/)"; - - /* System generated locals */ - integer i__1; - doublereal d__1, d__2, d__3, d__4, d__5; - - /* Builtin functions */ - double pow_di(doublereal *, integer *); - //integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); - - /* Local variables */ - doublereal a, b, c__; - integer i__; - static integer lt; - doublereal one, two; - logical ieee; - doublereal half; - logical lrnd; - static doublereal leps; - doublereal zero; - static integer lbeta; - doublereal rbase; - static integer lemin, lemax; - integer gnmin; - doublereal small; - integer gpmin; - doublereal third; - static doublereal lrmin, lrmax; - doublereal sixth; - extern /* Subroutine */ int dlamc1_(integer *, integer *, logical *, - logical *); - extern doublereal dlamc3_(doublereal *, doublereal *); - logical lieee1; - extern /* Subroutine */ int dlamc4_(integer *, doublereal *, integer *), - dlamc5_(integer *, integer *, integer *, logical *, integer *, - doublereal *); - integer ngnmin, ngpmin; - - /* Fortran I/O blocks */ - static cilist io___58 = { 0, 6, 0, fmt_9999, 0 }; - - - -/* -- LAPACK auxiliary routine (version 3.1) -- */ -/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ -/* November 2006 */ - -/* .. Scalar Arguments .. */ -/* .. */ - -/* Purpose */ -/* ======= */ - -/* DLAMC2 determines the machine parameters specified in its argument */ -/* list. */ - -/* Arguments */ -/* ========= */ - -/* BETA (output) INTEGER */ -/* The base of the machine. */ - -/* T (output) INTEGER */ -/* The number of ( BETA ) digits in the mantissa. */ - -/* RND (output) LOGICAL */ -/* Specifies whether proper rounding ( RND = .TRUE. ) or */ -/* chopping ( RND = .FALSE. ) occurs in addition. This may not */ -/* be a reliable guide to the way in which the machine performs */ -/* its arithmetic. */ - -/* EPS (output) DOUBLE PRECISION */ -/* The smallest positive number such that */ - -/* fl( 1.0 - EPS ) .LT. 1.0, */ - -/* where fl denotes the computed value. */ - -/* EMIN (output) INTEGER */ -/* The minimum exponent before (gradual) underflow occurs. */ - -/* RMIN (output) DOUBLE PRECISION */ -/* The smallest normalized number for the machine, given by */ -/* BASE**( EMIN - 1 ), where BASE is the floating point value */ -/* of BETA. */ - -/* EMAX (output) INTEGER */ -/* The maximum exponent before overflow occurs. */ - -/* RMAX (output) DOUBLE PRECISION */ -/* The largest positive number for the machine, given by */ -/* BASE**EMAX * ( 1 - EPS ), where BASE is the floating point */ -/* value of BETA. */ - -/* Further Details */ -/* =============== */ - -/* The computation of EPS is based on a routine PARANOIA by */ -/* W. Kahan of the University of California at Berkeley. */ - -/* ===================================================================== */ - -/* .. Local Scalars .. */ -/* .. */ -/* .. External Functions .. */ -/* .. */ -/* .. External Subroutines .. */ -/* .. */ -/* .. Intrinsic Functions .. */ -/* .. */ -/* .. Save statement .. */ -/* .. */ -/* .. Data statements .. */ -/* .. */ -/* .. Executable Statements .. */ - - if (first) { - zero = 0.; - one = 1.; - two = 2.; - -/* LBETA, LT, LRND, LEPS, LEMIN and LRMIN are the local values of */ -/* BETA, T, RND, EPS, EMIN and RMIN. */ - -/* Throughout this routine we use the function DLAMC3 to ensure */ -/* that relevant values are stored and not held in registers, or */ -/* are not affected by optimizers. */ - -/* DLAMC1 returns the parameters LBETA, LT, LRND and LIEEE1. */ - - dlamc1_(&lbeta, <, &lrnd, &lieee1); - -/* Start to find EPS. */ - - b = (doublereal) lbeta; - i__1 = -lt; - a = pow_di(&b, &i__1); - leps = a; - -/* Try some tricks to see whether or not this is the correct EPS. */ - - b = two / 3; - half = one / 2; - d__1 = -half; - sixth = dlamc3_(&b, &d__1); - third = dlamc3_(&sixth, &sixth); - d__1 = -half; - b = dlamc3_(&third, &d__1); - b = dlamc3_(&b, &sixth); - b = abs(b); - if (b < leps) { - b = leps; - } - - leps = 1.; - -/* + WHILE( ( LEPS.GT.B ).AND.( B.GT.ZERO ) )LOOP */ -L10: - if (leps > b && b > zero) { - leps = b; - d__1 = half * leps; -/* Computing 5th power */ - d__3 = two, d__4 = d__3, d__3 *= d__3; -/* Computing 2nd power */ - d__5 = leps; - d__2 = d__4 * (d__3 * d__3) * (d__5 * d__5); - c__ = dlamc3_(&d__1, &d__2); - d__1 = -c__; - c__ = dlamc3_(&half, &d__1); - b = dlamc3_(&half, &c__); - d__1 = -b; - c__ = dlamc3_(&half, &d__1); - b = dlamc3_(&half, &c__); - goto L10; - } -/* + END WHILE */ - - if (a < leps) { - leps = a; - } - -/* Computation of EPS complete. */ - -/* Now find EMIN. Let A = + or - 1, and + or - (1 + BASE**(-3)). */ -/* Keep dividing A by BETA until (gradual) underflow occurs. This */ -/* is detected when we cannot recover the previous A. */ - - rbase = one / lbeta; - small = one; - for (i__ = 1; i__ <= 3; ++i__) { - d__1 = small * rbase; - small = dlamc3_(&d__1, &zero); -/* L20: */ - } - a = dlamc3_(&one, &small); - dlamc4_(&ngpmin, &one, &lbeta); - d__1 = -one; - dlamc4_(&ngnmin, &d__1, &lbeta); - dlamc4_(&gpmin, &a, &lbeta); - d__1 = -a; - dlamc4_(&gnmin, &d__1, &lbeta); - ieee = FALSE_; - - if (ngpmin == ngnmin && gpmin == gnmin) { - if (ngpmin == gpmin) { - lemin = ngpmin; -/* ( Non twos-complement machines, no gradual underflow; */ -/* e.g., VAX ) */ - } else if (gpmin - ngpmin == 3) { - lemin = ngpmin - 1 + lt; - ieee = TRUE_; -/* ( Non twos-complement machines, with gradual underflow; */ -/* e.g., IEEE standard followers ) */ - } else { - lemin = min(ngpmin,gpmin); -/* ( A guess; no known machine ) */ - iwarn = TRUE_; - } - - } else if (ngpmin == gpmin && ngnmin == gnmin) { - if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1) { - lemin = max(ngpmin,ngnmin); -/* ( Twos-complement machines, no gradual underflow; */ -/* e.g., CYBER 205 ) */ - } else { - lemin = min(ngpmin,ngnmin); -/* ( A guess; no known machine ) */ - iwarn = TRUE_; - } - - } else if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1 && gpmin == gnmin) - { - if (gpmin - min(ngpmin,ngnmin) == 3) { - lemin = max(ngpmin,ngnmin) - 1 + lt; -/* ( Twos-complement machines with gradual underflow; */ -/* no known machine ) */ - } else { - lemin = min(ngpmin,ngnmin); -/* ( A guess; no known machine ) */ - iwarn = TRUE_; - } - - } else { -/* Computing MIN */ - i__1 = min(ngpmin,ngnmin), i__1 = min(i__1,gpmin); - lemin = min(i__1,gnmin); -/* ( A guess; no known machine ) */ - iwarn = TRUE_; - } - first = FALSE_; -/* ** */ -/* Comment out this if block if EMIN is ok */ - if (iwarn) { - first = TRUE_; - printf("\n\n WARNING. The value EMIN may be incorrect:- "); - printf("EMIN = %8i\n",lemin); - printf("If, after inspection, the value EMIN looks acceptable"); - printf("please comment out \n the IF block as marked within the"); - printf("code of routine DLAMC2, \n otherwise supply EMIN"); - printf("explicitly.\n"); - /* - s_wsfe(&io___58); - do_fio(&c__1, (char *)&lemin, (ftnlen)sizeof(integer)); - e_wsfe(); - */ - } -/* ** */ - -/* Assume IEEE arithmetic if we found denormalised numbers above, */ -/* or if arithmetic seems to round in the IEEE style, determined */ -/* in routine DLAMC1. A true IEEE machine should have both things */ -/* true; however, faulty machines may have one or the other. */ - - ieee = ieee || lieee1; - -/* Compute RMIN by successive division by BETA. We could compute */ -/* RMIN as BASE**( EMIN - 1 ), but some machines underflow during */ -/* this computation. */ - - lrmin = 1.; - i__1 = 1 - lemin; - for (i__ = 1; i__ <= i__1; ++i__) { - d__1 = lrmin * rbase; - lrmin = dlamc3_(&d__1, &zero); -/* L30: */ - } - -/* Finally, call DLAMC5 to compute EMAX and RMAX. */ - - dlamc5_(&lbeta, <, &lemin, &ieee, &lemax, &lrmax); - } - - *beta = lbeta; - *t = lt; - *rnd = lrnd; - *eps = leps; - *emin = lemin; - *rmin = lrmin; - *emax = lemax; - *rmax = lrmax; - - return 0; - - -/* End of DLAMC2 */ - -} /* dlamc2_ */ - - - -/* *********************************************************************** */ - -/* Subroutine */ int dlamc4_(integer *emin, doublereal *start, integer *base) -{ - /* System generated locals */ - integer i__1; - doublereal d__1; - - /* Local variables */ - doublereal a; - integer i__; - doublereal b1, b2, c1, c2, d1, d2, one, zero, rbase; - extern doublereal dlamc3_(doublereal *, doublereal *); - - -/* -- LAPACK auxiliary routine (version 3.1) -- */ -/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ -/* November 2006 */ - -/* .. Scalar Arguments .. */ -/* .. */ - -/* Purpose */ -/* ======= */ - -/* DLAMC4 is a service routine for DLAMC2. */ - -/* Arguments */ -/* ========= */ - -/* EMIN (output) INTEGER */ -/* The minimum exponent before (gradual) underflow, computed by */ -/* setting A = START and dividing by BASE until the previous A */ -/* can not be recovered. */ - -/* START (input) DOUBLE PRECISION */ -/* The starting point for determining EMIN. */ - -/* BASE (input) INTEGER */ -/* The base of the machine. */ - -/* ===================================================================== */ - -/* .. Local Scalars .. */ -/* .. */ -/* .. External Functions .. */ -/* .. */ -/* .. Executable Statements .. */ - - a = *start; - one = 1.; - rbase = one / *base; - zero = 0.; - *emin = 1; - d__1 = a * rbase; - b1 = dlamc3_(&d__1, &zero); - c1 = a; - c2 = a; - d1 = a; - d2 = a; -/* + WHILE( ( C1.EQ.A ).AND.( C2.EQ.A ).AND. */ -/* $ ( D1.EQ.A ).AND.( D2.EQ.A ) )LOOP */ -L10: - if (c1 == a && c2 == a && d1 == a && d2 == a) { - --(*emin); - a = b1; - d__1 = a / *base; - b1 = dlamc3_(&d__1, &zero); - d__1 = b1 * *base; - c1 = dlamc3_(&d__1, &zero); - d1 = zero; - i__1 = *base; - for (i__ = 1; i__ <= i__1; ++i__) { - d1 += b1; -/* L20: */ - } - d__1 = a * rbase; - b2 = dlamc3_(&d__1, &zero); - d__1 = b2 / rbase; - c2 = dlamc3_(&d__1, &zero); - d2 = zero; - i__1 = *base; - for (i__ = 1; i__ <= i__1; ++i__) { - d2 += b2; -/* L30: */ - } - goto L10; - } -/* + END WHILE */ - - return 0; - -/* End of DLAMC4 */ - -} /* dlamc4_ */ - - -/* *********************************************************************** */ - -/* Subroutine */ int dlamc5_(integer *beta, integer *p, integer *emin, - logical *ieee, integer *emax, doublereal *rmax) -{ - /* System generated locals */ - integer i__1; - doublereal d__1; - - /* Local variables */ - integer i__; - doublereal y, z__; - integer try__, lexp; - doublereal oldy; - integer uexp, nbits; - extern doublereal dlamc3_(doublereal *, doublereal *); - doublereal recbas; - integer exbits, expsum; - - -/* -- LAPACK auxiliary routine (version 3.1) -- */ -/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ -/* November 2006 */ - -/* .. Scalar Arguments .. */ -/* .. */ - -/* Purpose */ -/* ======= */ - -/* DLAMC5 attempts to compute RMAX, the largest machine floating-point */ -/* number, without overflow. It assumes that EMAX + abs(EMIN) sum */ -/* approximately to a power of 2. It will fail on machines where this */ -/* assumption does not hold, for example, the Cyber 205 (EMIN = -28625, */ -/* EMAX = 28718). It will also fail if the value supplied for EMIN is */ -/* too large (i.e. too close to zero), probably with overflow. */ - -/* Arguments */ -/* ========= */ - -/* BETA (input) INTEGER */ -/* The base of floating-point arithmetic. */ - -/* P (input) INTEGER */ -/* The number of base BETA digits in the mantissa of a */ -/* floating-point value. */ - -/* EMIN (input) INTEGER */ -/* The minimum exponent before (gradual) underflow. */ - -/* IEEE (input) LOGICAL */ -/* A logical flag specifying whether or not the arithmetic */ -/* system is thought to comply with the IEEE standard. */ - -/* EMAX (output) INTEGER */ -/* The largest exponent before overflow */ - -/* RMAX (output) DOUBLE PRECISION */ -/* The largest machine floating-point number. */ - -/* ===================================================================== */ - -/* .. Parameters .. */ -/* .. */ -/* .. Local Scalars .. */ -/* .. */ -/* .. External Functions .. */ -/* .. */ -/* .. Intrinsic Functions .. */ -/* .. */ -/* .. Executable Statements .. */ - -/* First compute LEXP and UEXP, two powers of 2 that bound */ -/* abs(EMIN). We then assume that EMAX + abs(EMIN) will sum */ -/* approximately to the bound that is closest to abs(EMIN). */ -/* (EMAX is the exponent of the required number RMAX). */ - - lexp = 1; - exbits = 1; -L10: - try__ = lexp << 1; - if (try__ <= -(*emin)) { - lexp = try__; - ++exbits; - goto L10; - } - if (lexp == -(*emin)) { - uexp = lexp; - } else { - uexp = try__; - ++exbits; - } - -/* Now -LEXP is less than or equal to EMIN, and -UEXP is greater */ -/* than or equal to EMIN. EXBITS is the number of bits needed to */ -/* store the exponent. */ - - if (uexp + *emin > -lexp - *emin) { - expsum = lexp << 1; - } else { - expsum = uexp << 1; - } - -/* EXPSUM is the exponent range, approximately equal to */ -/* EMAX - EMIN + 1 . */ - - *emax = expsum + *emin - 1; - nbits = exbits + 1 + *p; - -/* NBITS is the total number of bits needed to store a */ -/* floating-point number. */ - - if (nbits % 2 == 1 && *beta == 2) { - -/* Either there are an odd number of bits used to store a */ -/* floating-point number, which is unlikely, or some bits are */ -/* not used in the representation of numbers, which is possible, */ -/* (e.g. Cray machines) or the mantissa has an implicit bit, */ -/* (e.g. IEEE machines, Dec Vax machines), which is perhaps the */ -/* most likely. We have to assume the last alternative. */ -/* If this is true, then we need to reduce EMAX by one because */ -/* there must be some way of representing zero in an implicit-bit */ -/* system. On machines like Cray, we are reducing EMAX by one */ -/* unnecessarily. */ - - --(*emax); - } - - if (*ieee) { - -/* Assume we are on an IEEE machine which reserves one exponent */ -/* for infinity and NaN. */ - - --(*emax); - } - -/* Now create RMAX, the largest machine number, which should */ -/* be equal to (1.0 - BETA**(-P)) * BETA**EMAX . */ - -/* First compute 1.0 - BETA**(-P), being careful that the */ -/* result is less than 1.0 . */ - - recbas = 1. / *beta; - z__ = *beta - 1.; - y = 0.; - i__1 = *p; - for (i__ = 1; i__ <= i__1; ++i__) { - z__ *= recbas; - if (y < 1.) { - oldy = y; - } - y = dlamc3_(&y, &z__); -/* L20: */ - } - if (y >= 1.) { - y = oldy; - } - -/* Now multiply by BETA**EMAX to get RMAX. */ - - i__1 = *emax; - for (i__ = 1; i__ <= i__1; ++i__) { - d__1 = y * *beta; - y = dlamc3_(&d__1, &c_b32); -/* L30: */ - } - - *rmax = y; - return 0; - -/* End of DLAMC5 */ - -} /* dlamc5_ */ - -#endif diff --git a/3rdparty/lapack/dlamch_custom.c b/3rdparty/lapack/dlamch_custom.c new file mode 100644 index 000000000..2f1584b35 --- /dev/null +++ b/3rdparty/lapack/dlamch_custom.c @@ -0,0 +1,58 @@ +#include "clapack.h" +#include +#include + +/* *********************************************************************** */ + +doublereal dlamc3_(doublereal *a, doublereal *b) +{ + /* System generated locals */ + doublereal ret_val; + + + /* -- LAPACK auxiliary routine (version 3.1) -- */ + /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ + /* November 2006 */ + + /* .. Scalar Arguments .. */ + /* .. */ + + /* Purpose */ + /* ======= */ + + /* DLAMC3 is intended to force A and B to be stored prior to doing */ + /* the addition of A and B , for use in situations where optimizers */ + /* might hold one of these in a register. */ + + /* Arguments */ + /* ========= */ + + /* A (input) DOUBLE PRECISION */ + /* B (input) DOUBLE PRECISION */ + /* The values A and B. */ + + /* ===================================================================== */ + + /* .. Executable Statements .. */ + + ret_val = *a + *b; + + return ret_val; + + /* End of DLAMC3 */ + +} /* dlamc3_ */ + + +/* simpler version of dlamch for the case of IEEE754-compliant FPU module by Piotr Luszczek S. + taken from http://www.mail-archive.com/numpy-discussion@lists.sourceforge.net/msg02448.html */ + +#ifndef DBL_DIGITS +#define DBL_DIGITS 53 +#endif + +const doublereal lapack_dlamch_tab[] = +{ + 0, FLT_RADIX, DBL_EPSILON, DBL_MAX_EXP, DBL_MIN_EXP, DBL_DIGITS, DBL_MAX, + DBL_EPSILON*FLT_RADIX, 1, DBL_MIN*(1 + DBL_EPSILON), DBL_MIN +}; diff --git a/3rdparty/lapack/dlartg.c b/3rdparty/lapack/dlartg_custom.c similarity index 80% rename from 3rdparty/lapack/dlartg.c rename to 3rdparty/lapack/dlartg_custom.c index 8ee95934c..a0fdd2d4e 100644 --- a/3rdparty/lapack/dlartg.c +++ b/3rdparty/lapack/dlartg_custom.c @@ -1,15 +1,3 @@ -/* dlartg.f -- translated by f2c (version 20061008). - You must link the resulting object file with libf2c: - on Microsoft Windows system, link with libf2c.lib; - on Linux or Unix systems, link with .../path/to/libf2c.a -lm - or, if you install libf2c.a in a standard place, with -lf2c -lm - -- in that order, at the end of the command line, as in - cc *.o -lf2c -lm - Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., - - http://www.netlib.org/f2c/libf2c.zip -*/ - #include "clapack.h" @@ -20,17 +8,14 @@ integer i__1; doublereal d__1, d__2; - /* Builtin functions */ - double log(doublereal), pow_di(doublereal *, integer *), sqrt(doublereal); - /* Local variables */ integer i__; doublereal f1, g1, eps, scale; integer count; - doublereal safmn2, safmx2; - extern doublereal dlamch_(char *); - doublereal safmin; - + + static doublereal safmn2, safmx2; + static doublereal safmin; + static volatile logical FIRST = TRUE_; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ @@ -97,15 +82,16 @@ /* .. */ /* .. Executable Statements .. */ -/* IF( FIRST ) THEN */ - safmin = dlamch_("S"); - eps = dlamch_("E"); - d__1 = dlamch_("B"); - i__1 = (integer) (log(safmin / eps) / log(dlamch_("B")) / 2.); - safmn2 = pow_di(&d__1, &i__1); - safmx2 = 1. / safmn2; -/* FIRST = .FALSE. */ -/* END IF */ + if( FIRST ) + { + safmin = dlamch_("S"); + eps = dlamch_("E"); + d__1 = dlamch_("B"); + i__1 = (integer) (log(safmin / eps) / log(dlamch_("B")) / 2.); + safmn2 = pow_di(&d__1, &i__1); + safmx2 = 1. / safmn2; + FIRST = FALSE_; + } if (*g == 0.) { *cs = 1.; *sn = 0.; diff --git a/3rdparty/lapack/dlasr.c b/3rdparty/lapack/dlasr_custom.c similarity index 100% rename from 3rdparty/lapack/dlasr.c rename to 3rdparty/lapack/dlasr_custom.c diff --git a/3rdparty/lapack/ilaenv.c b/3rdparty/lapack/ilaenv.c deleted file mode 100644 index d35a8f74d..000000000 --- a/3rdparty/lapack/ilaenv.c +++ /dev/null @@ -1,654 +0,0 @@ -/* ilaenv.f -- translated by f2c (version 20061008). - You must link the resulting object file with libf2c: - on Microsoft Windows system, link with libf2c.lib; - on Linux or Unix systems, link with .../path/to/libf2c.a -lm - or, if you install libf2c.a in a standard place, with -lf2c -lm - -- in that order, at the end of the command line, as in - cc *.o -lf2c -lm - Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., - - http://www.netlib.org/f2c/libf2c.zip -*/ - -#include "clapack.h" - -#include "string.h" - -/* Table of constant values */ - -static integer c__1 = 1; -static real c_b163 = 0.f; -static real c_b164 = 1.f; -static integer c__0 = 0; - -integer ilaenv_(integer *ispec, char *name__, char *opts, integer *n1, - integer *n2, integer *n3, integer *n4) -{ - /* System generated locals */ - integer ret_val; - - /* Builtin functions */ - /* Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen); - integer s_cmp(char *, char *, ftnlen, ftnlen); - - /* Local variables */ - integer i__; - char c1[1], c2[2], c3[3], c4[2]; - integer ic, nb, iz, nx; - logical cname; - integer nbmin; - logical sname; - extern integer ieeeck_(integer *, real *, real *); - char subnam[6]; - extern integer iparmq_(integer *, char *, char *, integer *, integer *, - integer *, integer *); - - ftnlen name_len, opts_len; - - name_len = (ftnlen)strlen (name__); - opts_len = (ftnlen)strlen (opts); - -/* -- LAPACK auxiliary routine (version 3.2) -- */ -/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ -/* January 2007 */ - -/* .. Scalar Arguments .. */ -/* .. */ - -/* Purpose */ -/* ======= */ - -/* ILAENV is called from the LAPACK routines to choose problem-dependent */ -/* parameters for the local environment. See ISPEC for a description of */ -/* the parameters. */ - -/* ILAENV returns an INTEGER */ -/* if ILAENV >= 0: ILAENV returns the value of the parameter specified by ISPEC */ -/* if ILAENV < 0: if ILAENV = -k, the k-th argument had an illegal value. */ - -/* This version provides a set of parameters which should give good, */ -/* but not optimal, performance on many of the currently available */ -/* computers. Users are encouraged to modify this subroutine to set */ -/* the tuning parameters for their particular machine using the option */ -/* and problem size information in the arguments. */ - -/* This routine will not function correctly if it is converted to all */ -/* lower case. Converting it to all upper case is allowed. */ - -/* Arguments */ -/* ========= */ - -/* ISPEC (input) INTEGER */ -/* Specifies the parameter to be returned as the value of */ -/* ILAENV. */ -/* = 1: the optimal blocksize; if this value is 1, an unblocked */ -/* algorithm will give the best performance. */ -/* = 2: the minimum block size for which the block routine */ -/* should be used; if the usable block size is less than */ -/* this value, an unblocked routine should be used. */ -/* = 3: the crossover point (in a block routine, for N less */ -/* than this value, an unblocked routine should be used) */ -/* = 4: the number of shifts, used in the nonsymmetric */ -/* eigenvalue routines (DEPRECATED) */ -/* = 5: the minimum column dimension for blocking to be used; */ -/* rectangular blocks must have dimension at least k by m, */ -/* where k is given by ILAENV(2,...) and m by ILAENV(5,...) */ -/* = 6: the crossover point for the SVD (when reducing an m by n */ -/* matrix to bidiagonal form, if max(m,n)/min(m,n) exceeds */ -/* this value, a QR factorization is used first to reduce */ -/* the matrix to a triangular form.) */ -/* = 7: the number of processors */ -/* = 8: the crossover point for the multishift QR method */ -/* for nonsymmetric eigenvalue problems (DEPRECATED) */ -/* = 9: maximum size of the subproblems at the bottom of the */ -/* computation tree in the divide-and-conquer algorithm */ -/* (used by xGELSD and xGESDD) */ -/* =10: ieee NaN arithmetic can be trusted not to trap */ -/* =11: infinity arithmetic can be trusted not to trap */ -/* 12 <= ISPEC <= 16: */ -/* xHSEQR or one of its subroutines, */ -/* see IPARMQ for detailed explanation */ - -/* NAME (input) CHARACTER*(*) */ -/* The name of the calling subroutine, in either upper case or */ -/* lower case. */ - -/* OPTS (input) CHARACTER*(*) */ -/* The character options to the subroutine NAME, concatenated */ -/* into a single character string. For example, UPLO = 'U', */ -/* TRANS = 'T', and DIAG = 'N' for a triangular routine would */ -/* be specified as OPTS = 'UTN'. */ - -/* N1 (input) INTEGER */ -/* N2 (input) INTEGER */ -/* N3 (input) INTEGER */ -/* N4 (input) INTEGER */ -/* Problem dimensions for the subroutine NAME; these may not all */ -/* be required. */ - -/* Further Details */ -/* =============== */ - -/* The following conventions have been used when calling ILAENV from the */ -/* LAPACK routines: */ -/* 1) OPTS is a concatenation of all of the character options to */ -/* subroutine NAME, in the same order that they appear in the */ -/* argument list for NAME, even if they are not used in determining */ -/* the value of the parameter specified by ISPEC. */ -/* 2) The problem dimensions N1, N2, N3, N4 are specified in the order */ -/* that they appear in the argument list for NAME. N1 is used */ -/* first, N2 second, and so on, and unused problem dimensions are */ -/* passed a value of -1. */ -/* 3) The parameter value returned by ILAENV is checked for validity in */ -/* the calling subroutine. For example, ILAENV is used to retrieve */ -/* the optimal blocksize for STRTRI as follows: */ - -/* NB = ILAENV( 1, 'STRTRI', UPLO // DIAG, N, -1, -1, -1 ) */ -/* IF( NB.LE.1 ) NB = MAX( 1, N ) */ - -/* ===================================================================== */ - -/* .. Local Scalars .. */ -/* .. */ -/* .. Intrinsic Functions .. */ -/* .. */ -/* .. External Functions .. */ -/* .. */ -/* .. Executable Statements .. */ - - switch (*ispec) { - case 1: goto L10; - case 2: goto L10; - case 3: goto L10; - case 4: goto L80; - case 5: goto L90; - case 6: goto L100; - case 7: goto L110; - case 8: goto L120; - case 9: goto L130; - case 10: goto L140; - case 11: goto L150; - case 12: goto L160; - case 13: goto L160; - case 14: goto L160; - case 15: goto L160; - case 16: goto L160; - } - -/* Invalid value for ISPEC */ - - ret_val = -1; - return ret_val; - -L10: - -/* Convert NAME to upper case if the first character is lower case. */ - - ret_val = 1; - s_copy(subnam, name__, (ftnlen)1, name_len); - ic = *(unsigned char *)subnam; - iz = 'Z'; - if (iz == 90 || iz == 122) { - -/* ASCII character set */ - - if (ic >= 97 && ic <= 122) { - *(unsigned char *)subnam = (char) (ic - 32); - for (i__ = 2; i__ <= 6; ++i__) { - ic = *(unsigned char *)&subnam[i__ - 1]; - if (ic >= 97 && ic <= 122) { - *(unsigned char *)&subnam[i__ - 1] = (char) (ic - 32); - } -/* L20: */ - } - } - - } else if (iz == 233 || iz == 169) { - -/* EBCDIC character set */ - - if (ic >= 129 && ic <= 137 || ic >= 145 && ic <= 153 || ic >= 162 && - ic <= 169) { - *(unsigned char *)subnam = (char) (ic + 64); - for (i__ = 2; i__ <= 6; ++i__) { - ic = *(unsigned char *)&subnam[i__ - 1]; - if (ic >= 129 && ic <= 137 || ic >= 145 && ic <= 153 || ic >= - 162 && ic <= 169) { - *(unsigned char *)&subnam[i__ - 1] = (char) (ic + 64); - } -/* L30: */ - } - } - - } else if (iz == 218 || iz == 250) { - -/* Prime machines: ASCII+128 */ - - if (ic >= 225 && ic <= 250) { - *(unsigned char *)subnam = (char) (ic - 32); - for (i__ = 2; i__ <= 6; ++i__) { - ic = *(unsigned char *)&subnam[i__ - 1]; - if (ic >= 225 && ic <= 250) { - *(unsigned char *)&subnam[i__ - 1] = (char) (ic - 32); - } -/* L40: */ - } - } - } - - *(unsigned char *)c1 = *(unsigned char *)subnam; - sname = *(unsigned char *)c1 == 'S' || *(unsigned char *)c1 == 'D'; - cname = *(unsigned char *)c1 == 'C' || *(unsigned char *)c1 == 'Z'; - if (! (cname || sname)) { - return ret_val; - } - s_copy(c2, subnam + 1, (ftnlen)1, (ftnlen)2); - s_copy(c3, subnam + 3, (ftnlen)1, (ftnlen)3); - s_copy(c4, c3 + 1, (ftnlen)1, (ftnlen)2); - - switch (*ispec) { - case 1: goto L50; - case 2: goto L60; - case 3: goto L70; - } - -L50: - -/* ISPEC = 1: block size */ - -/* In these examples, separate code is provided for setting NB for */ -/* real and complex. We assume that NB will take the same value in */ -/* single or double precision. */ - - nb = 1; - - if (s_cmp(c2, "GE", (ftnlen)1, (ftnlen)2) == 0) { - if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) { - if (sname) { - nb = 64; - } else { - nb = 64; - } - } else if (s_cmp(c3, "QRF", (ftnlen)1, (ftnlen)3) == 0 || s_cmp(c3, - "RQF", (ftnlen)1, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen) - 1, (ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)1, (ftnlen)3) - == 0) { - if (sname) { - nb = 32; - } else { - nb = 32; - } - } else if (s_cmp(c3, "HRD", (ftnlen)1, (ftnlen)3) == 0) { - if (sname) { - nb = 32; - } else { - nb = 32; - } - } else if (s_cmp(c3, "BRD", (ftnlen)1, (ftnlen)3) == 0) { - if (sname) { - nb = 32; - } else { - nb = 32; - } - } else if (s_cmp(c3, "TRI", (ftnlen)1, (ftnlen)3) == 0) { - if (sname) { - nb = 64; - } else { - nb = 64; - } - } - } else if (s_cmp(c2, "PO", (ftnlen)1, (ftnlen)2) == 0) { - if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) { - if (sname) { - nb = 64; - } else { - nb = 64; - } - } - } else if (s_cmp(c2, "SY", (ftnlen)1, (ftnlen)2) == 0) { - if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) { - if (sname) { - nb = 64; - } else { - nb = 64; - } - } else if (sname && s_cmp(c3, "TRD", (ftnlen)1, (ftnlen)3) == 0) { - nb = 32; - } else if (sname && s_cmp(c3, "GST", (ftnlen)1, (ftnlen)3) == 0) { - nb = 64; - } - } else if (cname && s_cmp(c2, "HE", (ftnlen)1, (ftnlen)2) == 0) { - if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) { - nb = 64; - } else if (s_cmp(c3, "TRD", (ftnlen)1, (ftnlen)3) == 0) { - nb = 32; - } else if (s_cmp(c3, "GST", (ftnlen)1, (ftnlen)3) == 0) { - nb = 64; - } - } else if (sname && s_cmp(c2, "OR", (ftnlen)1, (ftnlen)2) == 0) { - if (*(unsigned char *)c3 == 'G') { - if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", - (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, ( - ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) == - 0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp( - c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( - ftnlen)1, (ftnlen)2) == 0) { - nb = 32; - } - } else if (*(unsigned char *)c3 == 'M') { - if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", - (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, ( - ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) == - 0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp( - c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( - ftnlen)1, (ftnlen)2) == 0) { - nb = 32; - } - } - } else if (cname && s_cmp(c2, "UN", (ftnlen)1, (ftnlen)2) == 0) { - if (*(unsigned char *)c3 == 'G') { - if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", - (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, ( - ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) == - 0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp( - c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( - ftnlen)1, (ftnlen)2) == 0) { - nb = 32; - } - } else if (*(unsigned char *)c3 == 'M') { - if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", - (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, ( - ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) == - 0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp( - c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( - ftnlen)1, (ftnlen)2) == 0) { - nb = 32; - } - } - } else if (s_cmp(c2, "GB", (ftnlen)1, (ftnlen)2) == 0) { - if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) { - if (sname) { - if (*n4 <= 64) { - nb = 1; - } else { - nb = 32; - } - } else { - if (*n4 <= 64) { - nb = 1; - } else { - nb = 32; - } - } - } - } else if (s_cmp(c2, "PB", (ftnlen)1, (ftnlen)2) == 0) { - if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) { - if (sname) { - if (*n2 <= 64) { - nb = 1; - } else { - nb = 32; - } - } else { - if (*n2 <= 64) { - nb = 1; - } else { - nb = 32; - } - } - } - } else if (s_cmp(c2, "TR", (ftnlen)1, (ftnlen)2) == 0) { - if (s_cmp(c3, "TRI", (ftnlen)1, (ftnlen)3) == 0) { - if (sname) { - nb = 64; - } else { - nb = 64; - } - } - } else if (s_cmp(c2, "LA", (ftnlen)1, (ftnlen)2) == 0) { - if (s_cmp(c3, "UUM", (ftnlen)1, (ftnlen)3) == 0) { - if (sname) { - nb = 64; - } else { - nb = 64; - } - } - } else if (sname && s_cmp(c2, "ST", (ftnlen)1, (ftnlen)2) == 0) { - if (s_cmp(c3, "EBZ", (ftnlen)1, (ftnlen)3) == 0) { - nb = 1; - } - } - ret_val = nb; - return ret_val; - -L60: - -/* ISPEC = 2: minimum block size */ - - nbmin = 2; - if (s_cmp(c2, "GE", (ftnlen)1, (ftnlen)2) == 0) { - if (s_cmp(c3, "QRF", (ftnlen)1, (ftnlen)3) == 0 || s_cmp(c3, "RQF", ( - ftnlen)1, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen)1, ( - ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)1, (ftnlen)3) == 0) - { - if (sname) { - nbmin = 2; - } else { - nbmin = 2; - } - } else if (s_cmp(c3, "HRD", (ftnlen)1, (ftnlen)3) == 0) { - if (sname) { - nbmin = 2; - } else { - nbmin = 2; - } - } else if (s_cmp(c3, "BRD", (ftnlen)1, (ftnlen)3) == 0) { - if (sname) { - nbmin = 2; - } else { - nbmin = 2; - } - } else if (s_cmp(c3, "TRI", (ftnlen)1, (ftnlen)3) == 0) { - if (sname) { - nbmin = 2; - } else { - nbmin = 2; - } - } - } else if (s_cmp(c2, "SY", (ftnlen)1, (ftnlen)2) == 0) { - if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) { - if (sname) { - nbmin = 8; - } else { - nbmin = 8; - } - } else if (sname && s_cmp(c3, "TRD", (ftnlen)1, (ftnlen)3) == 0) { - nbmin = 2; - } - } else if (cname && s_cmp(c2, "HE", (ftnlen)1, (ftnlen)2) == 0) { - if (s_cmp(c3, "TRD", (ftnlen)1, (ftnlen)3) == 0) { - nbmin = 2; - } - } else if (sname && s_cmp(c2, "OR", (ftnlen)1, (ftnlen)2) == 0) { - if (*(unsigned char *)c3 == 'G') { - if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", - (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, ( - ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) == - 0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp( - c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( - ftnlen)1, (ftnlen)2) == 0) { - nbmin = 2; - } - } else if (*(unsigned char *)c3 == 'M') { - if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", - (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, ( - ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) == - 0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp( - c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( - ftnlen)1, (ftnlen)2) == 0) { - nbmin = 2; - } - } - } else if (cname && s_cmp(c2, "UN", (ftnlen)1, (ftnlen)2) == 0) { - if (*(unsigned char *)c3 == 'G') { - if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", - (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, ( - ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) == - 0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp( - c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( - ftnlen)1, (ftnlen)2) == 0) { - nbmin = 2; - } - } else if (*(unsigned char *)c3 == 'M') { - if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", - (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, ( - ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) == - 0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp( - c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( - ftnlen)1, (ftnlen)2) == 0) { - nbmin = 2; - } - } - } - ret_val = nbmin; - return ret_val; - -L70: - -/* ISPEC = 3: crossover point */ - - nx = 0; - if (s_cmp(c2, "GE", (ftnlen)1, (ftnlen)2) == 0) { - if (s_cmp(c3, "QRF", (ftnlen)1, (ftnlen)3) == 0 || s_cmp(c3, "RQF", ( - ftnlen)1, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen)1, ( - ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)1, (ftnlen)3) == 0) - { - if (sname) { - nx = 128; - } else { - nx = 128; - } - } else if (s_cmp(c3, "HRD", (ftnlen)1, (ftnlen)3) == 0) { - if (sname) { - nx = 128; - } else { - nx = 128; - } - } else if (s_cmp(c3, "BRD", (ftnlen)1, (ftnlen)3) == 0) { - if (sname) { - nx = 128; - } else { - nx = 128; - } - } - } else if (s_cmp(c2, "SY", (ftnlen)1, (ftnlen)2) == 0) { - if (sname && s_cmp(c3, "TRD", (ftnlen)1, (ftnlen)3) == 0) { - nx = 32; - } - } else if (cname && s_cmp(c2, "HE", (ftnlen)1, (ftnlen)2) == 0) { - if (s_cmp(c3, "TRD", (ftnlen)1, (ftnlen)3) == 0) { - nx = 32; - } - } else if (sname && s_cmp(c2, "OR", (ftnlen)1, (ftnlen)2) == 0) { - if (*(unsigned char *)c3 == 'G') { - if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", - (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, ( - ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) == - 0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp( - c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( - ftnlen)1, (ftnlen)2) == 0) { - nx = 128; - } - } - } else if (cname && s_cmp(c2, "UN", (ftnlen)1, (ftnlen)2) == 0) { - if (*(unsigned char *)c3 == 'G') { - if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", - (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, ( - ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) == - 0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp( - c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( - ftnlen)1, (ftnlen)2) == 0) { - nx = 128; - } - } - } - ret_val = nx; - return ret_val; - -L80: - -/* ISPEC = 4: number of shifts (used by xHSEQR) */ - - ret_val = 6; - return ret_val; - -L90: - -/* ISPEC = 5: minimum column dimension (not used) */ - - ret_val = 2; - return ret_val; - -L100: - -/* ISPEC = 6: crossover point for SVD (used by xGELSS and xGESVD) */ - - ret_val = (integer) ((real) min(*n1,*n2) * 1.6f); - return ret_val; - -L110: - -/* ISPEC = 7: number of processors (not used) */ - - ret_val = 1; - return ret_val; - -L120: - -/* ISPEC = 8: crossover point for multishift (used by xHSEQR) */ - - ret_val = 50; - return ret_val; - -L130: - -/* ISPEC = 9: maximum size of the subproblems at the bottom of the */ -/* computation tree in the divide-and-conquer algorithm */ -/* (used by xGELSD and xGESDD) */ - - ret_val = 25; - return ret_val; - -L140: - -/* ISPEC = 10: ieee NaN arithmetic can be trusted not to trap */ - -/* ILAENV = 0 */ - ret_val = 1; - if (ret_val == 1) { - ret_val = ieeeck_(&c__1, &c_b163, &c_b164); - } - return ret_val; - -L150: - -/* ISPEC = 11: infinity arithmetic can be trusted not to trap */ - -/* ILAENV = 0 */ - ret_val = 1; - if (ret_val == 1) { - ret_val = ieeeck_(&c__0, &c_b163, &c_b164); - } - return ret_val; - -L160: - -/* 12 <= ISPEC <= 16: xHSEQR or one of its subroutines. */ - - ret_val = iparmq_(ispec, name__, opts, n1, n2, n3, n4) - ; - return ret_val; - -/* End of ILAENV */ - -} /* ilaenv_ */ diff --git a/3rdparty/lapack/ilaenv_custom.c b/3rdparty/lapack/ilaenv_custom.c new file mode 100644 index 000000000..e58833fb3 --- /dev/null +++ b/3rdparty/lapack/ilaenv_custom.c @@ -0,0 +1,191 @@ +/* ilaenv.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "clapack.h" + +#include "string.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static real c_b163 = 0.f; +static real c_b164 = 1.f; +static integer c__0 = 0; + +integer ilaenv_(integer *ispec, char *name__, char *opts, integer *n1, + integer *n2, integer *n3, integer *n4) +{ +/* -- LAPACK auxiliary routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* January 2007 */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* ILAENV is called from the LAPACK routines to choose problem-dependent */ +/* parameters for the local environment. See ISPEC for a description of */ +/* the parameters. */ + +/* ILAENV returns an INTEGER */ +/* if ILAENV >= 0: ILAENV returns the value of the parameter specified by ISPEC */ +/* if ILAENV < 0: if ILAENV = -k, the k-th argument had an illegal value. */ + +/* This version provides a set of parameters which should give good, */ +/* but not optimal, performance on many of the currently available */ +/* computers. Users are encouraged to modify this subroutine to set */ +/* the tuning parameters for their particular machine using the option */ +/* and problem size information in the arguments. */ + +/* This routine will not function correctly if it is converted to all */ +/* lower case. Converting it to all upper case is allowed. */ + +/* Arguments */ +/* ========= */ + +/* ISPEC (input) INTEGER */ +/* Specifies the parameter to be returned as the value of */ +/* ILAENV. */ +/* = 1: the optimal blocksize; if this value is 1, an unblocked */ +/* algorithm will give the best performance. */ +/* = 2: the minimum block size for which the block routine */ +/* should be used; if the usable block size is less than */ +/* this value, an unblocked routine should be used. */ +/* = 3: the crossover point (in a block routine, for N less */ +/* than this value, an unblocked routine should be used) */ +/* = 4: the number of shifts, used in the nonsymmetric */ +/* eigenvalue routines (DEPRECATED) */ +/* = 5: the minimum column dimension for blocking to be used; */ +/* rectangular blocks must have dimension at least k by m, */ +/* where k is given by ILAENV(2,...) and m by ILAENV(5,...) */ +/* = 6: the crossover point for the SVD (when reducing an m by n */ +/* matrix to bidiagonal form, if max(m,n)/min(m,n) exceeds */ +/* this value, a QR factorization is used first to reduce */ +/* the matrix to a triangular form.) */ +/* = 7: the number of processors */ +/* = 8: the crossover point for the multishift QR method */ +/* for nonsymmetric eigenvalue problems (DEPRECATED) */ +/* = 9: maximum size of the subproblems at the bottom of the */ +/* computation tree in the divide-and-conquer algorithm */ +/* (used by xGELSD and xGESDD) */ +/* =10: ieee NaN arithmetic can be trusted not to trap */ +/* =11: infinity arithmetic can be trusted not to trap */ +/* 12 <= ISPEC <= 16: */ +/* xHSEQR or one of its subroutines, */ +/* see IPARMQ for detailed explanation */ + +/* NAME (input) CHARACTER*(*) */ +/* The name of the calling subroutine, in either upper case or */ +/* lower case. */ + +/* OPTS (input) CHARACTER*(*) */ +/* The character options to the subroutine NAME, concatenated */ +/* into a single character string. For example, UPLO = 'U', */ +/* TRANS = 'T', and DIAG = 'N' for a triangular routine would */ +/* be specified as OPTS = 'UTN'. */ + +/* N1 (input) INTEGER */ +/* N2 (input) INTEGER */ +/* N3 (input) INTEGER */ +/* N4 (input) INTEGER */ +/* Problem dimensions for the subroutine NAME; these may not all */ +/* be required. */ + +/* Further Details */ +/* =============== */ + +/* The following conventions have been used when calling ILAENV from the */ +/* LAPACK routines: */ +/* 1) OPTS is a concatenation of all of the character options to */ +/* subroutine NAME, in the same order that they appear in the */ +/* argument list for NAME, even if they are not used in determining */ +/* the value of the parameter specified by ISPEC. */ +/* 2) The problem dimensions N1, N2, N3, N4 are specified in the order */ +/* that they appear in the argument list for NAME. N1 is used */ +/* first, N2 second, and so on, and unused problem dimensions are */ +/* passed a value of -1. */ +/* 3) The parameter value returned by ILAENV is checked for validity in */ +/* the calling subroutine. For example, ILAENV is used to retrieve */ +/* the optimal blocksize for STRTRI as follows: */ + +/* NB = ILAENV( 1, 'STRTRI', UPLO // DIAG, N, -1, -1, -1 ) */ +/* IF( NB.LE.1 ) NB = MAX( 1, N ) */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + switch (*ispec) { + case 1: + /* ISPEC = 1: block size */ + + /* In these examples, separate code is provided for setting NB for */ + /* real and complex. We assume that NB will take the same value in */ + /* single or double precision. */ + return 1; + case 2: + /* ISPEC = 2: minimum block size */ + return 2; + case 3: + /* ISPEC = 3: crossover point */ + return 3; + case 4: + /* ISPEC = 4: number of shifts (used by xHSEQR) */ + return 6; + case 5: + /* ISPEC = 5: minimum column dimension (not used) */ + return 2; + case 6: + /* ISPEC = 6: crossover point for SVD (used by xGELSS and xGESVD) */ + return (integer) ((real) min(*n1,*n2) * 1.6f); + case 7: + /* ISPEC = 7: number of processors (not used) */ + return 1; + case 8: + /* ISPEC = 8: crossover point for multishift (used by xHSEQR) */ + return 50; + case 9: + /* ISPEC = 9: maximum size of the subproblems at the bottom of the */ + /* computation tree in the divide-and-conquer algorithm */ + /* (used by xGELSD and xGESDD) */ + return 25; + case 10: + /* ISPEC = 10: ieee NaN arithmetic can be trusted not to trap */ + return ieeeck_(&c__1, &c_b163, &c_b164); + case 11: + /* ISPEC = 11: infinity arithmetic can be trusted not to trap */ + return ieeeck_(&c__0, &c_b163, &c_b164); + case 12: + case 13: + case 14: + case 15: + case 16: + /* 12 <= ISPEC <= 16: xHSEQR or one of its subroutines. */ + return iparmq_(ispec, name__, opts, n1, n2, n3, n4); + default: + break; + } + + /* Invalid value for ISPEC */ + return -1; + +/* End of ILAENV */ + +} /* ilaenv_ */ diff --git a/3rdparty/lapack/sgemv.c b/3rdparty/lapack/sgemv.c deleted file mode 100644 index 857f0131f..000000000 --- a/3rdparty/lapack/sgemv.c +++ /dev/null @@ -1,312 +0,0 @@ -/* sgemv.f -- translated by f2c (version 20061008). - You must link the resulting object file with libf2c: - on Microsoft Windows system, link with libf2c.lib; - on Linux or Unix systems, link with .../path/to/libf2c.a -lm - or, if you install libf2c.a in a standard place, with -lf2c -lm - -- in that order, at the end of the command line, as in - cc *.o -lf2c -lm - Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., - - http://www.netlib.org/f2c/libf2c.zip -*/ - -#include "clapack.h" - - -/* Subroutine */ int sgemv_(char *trans, integer *m, integer *n, real *alpha, - real *a, integer *lda, real *x, integer *incx, real *beta, real *y, - integer *incy) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2; - - /* Local variables */ - integer i__, j, ix, iy, jx, jy, kx, ky, info; - real temp; - integer lenx, leny; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int xerbla_(char *, integer *); - -/* .. Scalar Arguments .. */ -/* .. */ -/* .. Array Arguments .. */ -/* .. */ - -/* Purpose */ -/* ======= */ - -/* SGEMV performs one of the matrix-vector operations */ - -/* y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, */ - -/* where alpha and beta are scalars, x and y are vectors and A is an */ -/* m by n matrix. */ - -/* Arguments */ -/* ========== */ - -/* TRANS - CHARACTER*1. */ -/* On entry, TRANS specifies the operation to be performed as */ -/* follows: */ - -/* TRANS = 'N' or 'n' y := alpha*A*x + beta*y. */ - -/* TRANS = 'T' or 't' y := alpha*A'*x + beta*y. */ - -/* TRANS = 'C' or 'c' y := alpha*A'*x + beta*y. */ - -/* Unchanged on exit. */ - -/* M - INTEGER. */ -/* On entry, M specifies the number of rows of the matrix A. */ -/* M must be at least zero. */ -/* Unchanged on exit. */ - -/* N - INTEGER. */ -/* On entry, N specifies the number of columns of the matrix A. */ -/* N must be at least zero. */ -/* Unchanged on exit. */ - -/* ALPHA - REAL . */ -/* On entry, ALPHA specifies the scalar alpha. */ -/* Unchanged on exit. */ - -/* A - REAL array of DIMENSION ( LDA, n ). */ -/* Before entry, the leading m by n part of the array A must */ -/* contain the matrix of coefficients. */ -/* Unchanged on exit. */ - -/* LDA - INTEGER. */ -/* On entry, LDA specifies the first dimension of A as declared */ -/* in the calling (sub) program. LDA must be at least */ -/* max( 1, m ). */ -/* Unchanged on exit. */ - -/* X - REAL array of DIMENSION at least */ -/* ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' */ -/* and at least */ -/* ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. */ -/* Before entry, the incremented array X must contain the */ -/* vector x. */ -/* Unchanged on exit. */ - -/* INCX - INTEGER. */ -/* On entry, INCX specifies the increment for the elements of */ -/* X. INCX must not be zero. */ -/* Unchanged on exit. */ - -/* BETA - REAL . */ -/* On entry, BETA specifies the scalar beta. When BETA is */ -/* supplied as zero then Y need not be set on input. */ -/* Unchanged on exit. */ - -/* Y - REAL array of DIMENSION at least */ -/* ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' */ -/* and at least */ -/* ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. */ -/* Before entry with BETA non-zero, the incremented array Y */ -/* must contain the vector y. On exit, Y is overwritten by the */ -/* updated vector y. */ - -/* INCY - INTEGER. */ -/* On entry, INCY specifies the increment for the elements of */ -/* Y. INCY must not be zero. */ -/* Unchanged on exit. */ - - -/* Level 2 Blas routine. */ - -/* -- Written on 22-October-1986. */ -/* Jack Dongarra, Argonne National Lab. */ -/* Jeremy Du Croz, Nag Central Office. */ -/* Sven Hammarling, Nag Central Office. */ -/* Richard Hanson, Sandia National Labs. */ - - -/* .. Parameters .. */ -/* .. */ -/* .. Local Scalars .. */ -/* .. */ -/* .. External Functions .. */ -/* .. */ -/* .. External Subroutines .. */ -/* .. */ -/* .. Intrinsic Functions .. */ -/* .. */ - -/* Test the input parameters. */ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --x; - --y; - - /* Function Body */ - info = 0; - if (! lsame_(trans, "N") && ! lsame_(trans, "T") && ! lsame_(trans, "C") - ) { - info = 1; - } else if (*m < 0) { - info = 2; - } else if (*n < 0) { - info = 3; - } else if (*lda < max(1,*m)) { - info = 6; - } else if (*incx == 0) { - info = 8; - } else if (*incy == 0) { - info = 11; - } - if (info != 0) { - xerbla_("SGEMV ", &info); - return 0; - } - -/* Quick return if possible. */ - - if (*m == 0 || *n == 0 || *alpha == 0.f && *beta == 1.f) { - return 0; - } - -/* Set LENX and LENY, the lengths of the vectors x and y, and set */ -/* up the start points in X and Y. */ - - if (lsame_(trans, "N")) { - lenx = *n; - leny = *m; - } else { - lenx = *m; - leny = *n; - } - if (*incx > 0) { - kx = 1; - } else { - kx = 1 - (lenx - 1) * *incx; - } - if (*incy > 0) { - ky = 1; - } else { - ky = 1 - (leny - 1) * *incy; - } - -/* Start the operations. In this version the elements of A are */ -/* accessed sequentially with one pass through A. */ - -/* First form y := beta*y. */ - - if (*beta != 1.f) { - if (*incy == 1) { - if (*beta == 0.f) { - i__1 = leny; - for (i__ = 1; i__ <= i__1; ++i__) { - y[i__] = 0.f; -/* L10: */ - } - } else { - i__1 = leny; - for (i__ = 1; i__ <= i__1; ++i__) { - y[i__] = *beta * y[i__]; -/* L20: */ - } - } - } else { - iy = ky; - if (*beta == 0.f) { - i__1 = leny; - for (i__ = 1; i__ <= i__1; ++i__) { - y[iy] = 0.f; - iy += *incy; -/* L30: */ - } - } else { - i__1 = leny; - for (i__ = 1; i__ <= i__1; ++i__) { - y[iy] = *beta * y[iy]; - iy += *incy; -/* L40: */ - } - } - } - } - if (*alpha == 0.f) { - return 0; - } - if (lsame_(trans, "N")) { - -/* Form y := alpha*A*x + y. */ - - jx = kx; - if (*incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (x[jx] != 0.f) { - temp = *alpha * x[jx]; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - y[i__] += temp * a[i__ + j * a_dim1]; -/* L50: */ - } - } - jx += *incx; -/* L60: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (x[jx] != 0.f) { - temp = *alpha * x[jx]; - iy = ky; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - y[iy] += temp * a[i__ + j * a_dim1]; - iy += *incy; -/* L70: */ - } - } - jx += *incx; -/* L80: */ - } - } - } else { - -/* Form y := alpha*A'*x + y. */ - - jy = ky; - if (*incx == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp = 0.f; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - temp += a[i__ + j * a_dim1] * x[i__]; -/* L90: */ - } - y[jy] += *alpha * temp; - jy += *incy; -/* L100: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp = 0.f; - ix = kx; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - temp += a[i__ + j * a_dim1] * x[ix]; - ix += *incx; -/* L110: */ - } - y[jy] += *alpha * temp; - jy += *incy; -/* L120: */ - } - } - } - - return 0; - -/* End of SGEMV . */ - -} /* sgemv_ */ diff --git a/3rdparty/lapack/sgemv_custom.c b/3rdparty/lapack/sgemv_custom.c new file mode 100644 index 000000000..0cb7a9750 --- /dev/null +++ b/3rdparty/lapack/sgemv_custom.c @@ -0,0 +1,204 @@ +#include "clapack.h" +#include + +/* Subroutine */ int sgemv_(char *_trans, integer *_m, integer *_n, real *_alpha, + real *a, integer *_lda, real *x, integer *_incx, real *_beta, real *y, + integer *_incy) +{ + + /* .. Scalar Arguments .. */ + /* .. */ + /* .. Array Arguments .. */ + /* .. */ + + /* Purpose */ + /* ======= */ + + /* SGEMV performs one of the matrix-vector operations */ + + /* y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, */ + + /* where alpha and beta are scalars, x and y are vectors and A is an */ + /* m by n matrix. */ + + /* Arguments */ + /* ========== */ + + /* TRANS - CHARACTER*1. */ + /* On entry, TRANS specifies the operation to be performed as */ + /* follows: */ + + /* TRANS = 'N' or 'n' y := alpha*A*x + beta*y. */ + + /* TRANS = 'T' or 't' y := alpha*A'*x + beta*y. */ + + /* TRANS = 'C' or 'c' y := alpha*A'*x + beta*y. */ + + /* Unchanged on exit. */ + + /* M - INTEGER. */ + /* On entry, M specifies the number of rows of the matrix A. */ + /* M must be at least zero. */ + /* Unchanged on exit. */ + + /* N - INTEGER. */ + /* On entry, N specifies the number of columns of the matrix A. */ + /* N must be at least zero. */ + /* Unchanged on exit. */ + + /* ALPHA - REAL . */ + /* On entry, ALPHA specifies the scalar alpha. */ + /* Unchanged on exit. */ + + /* A - REAL array of DIMENSION ( LDA, n ). */ + /* Before entry, the leading m by n part of the array A must */ + /* contain the matrix of coefficients. */ + /* Unchanged on exit. */ + + /* LDA - INTEGER. */ + /* On entry, LDA specifies the first dimension of A as declared */ + /* in the calling (sub) program. LDA must be at least */ + /* max( 1, m ). */ + /* Unchanged on exit. */ + + /* X - REAL array of DIMENSION at least */ + /* ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' */ + /* and at least */ + /* ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. */ + /* Before entry, the incremented array X must contain the */ + /* vector x. */ + /* Unchanged on exit. */ + + /* INCX - INTEGER. */ + /* On entry, INCX specifies the increment for the elements of */ + /* X. INCX must not be zero. */ + /* Unchanged on exit. */ + + /* BETA - REAL . */ + /* On entry, BETA specifies the scalar beta. When BETA is */ + /* supplied as zero then Y need not be set on input. */ + /* Unchanged on exit. */ + + /* Y - REAL array of DIMENSION at least */ + /* ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' */ + /* and at least */ + /* ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. */ + /* Before entry with BETA non-zero, the incremented array Y */ + /* must contain the vector y. On exit, Y is overwritten by the */ + /* updated vector y. */ + + /* INCY - INTEGER. */ + /* On entry, INCY specifies the increment for the elements of */ + /* Y. INCY must not be zero. */ + /* Unchanged on exit. */ + + + /* Level 2 Blas routine. */ + + /* -- Written on 22-October-1986. */ + /* Jack Dongarra, Argonne National Lab. */ + /* Jeremy Du Croz, Nag Central Office. */ + /* Sven Hammarling, Nag Central Office. */ + /* Richard Hanson, Sandia National Labs. */ + + /* Test the input parameters. */ + + /* Function Body */ + char trans = lapack_toupper(_trans[0]); + integer i, j, m = *_m, n = *_n, lda = *_lda, incx = *_incx, incy = *_incy; + integer leny = trans == 'N' ? m : n, lenx = trans == 'N' ? n : m; + real alpha = *_alpha, beta = *_beta; + + integer info = 0; + if (trans != 'N' && trans != 'T' && trans != 'C') + info = 1; + else if (m < 0) + info = 2; + else if (n < 0) + info = 3; + else if (lda < max(1,m)) + info = 6; + else if (incx == 0) + info = 8; + else if (incy == 0) + info = 11; + + if (info != 0) + { + xerbla_("SGEMV ", &info); + return 0; + } + + if( incy < 0 ) + y -= incy*(leny - 1); + if( incx < 0 ) + x -= incx*(lenx - 1); + + /* Start the operations. In this version the elements of A are */ + /* accessed sequentially with one pass through A. */ + + if( beta != 1.f ) + { + if( incy == 1 ) + { + if( beta == 0.f ) + for( i = 0; i < leny; i++ ) + y[i] = 0.f; + else + for( i = 0; i < leny; i++ ) + y[i] *= beta; + } + else + { + if( beta == 0.f ) + for( i = 0; i < leny; i++ ) + y[i*incy] = 0.f; + else + for( i = 0; i < leny; i++ ) + y[i*incy] *= beta; + } + } + + if( alpha == 0.f ) + ; + else if( trans == 'N' ) + { + for( i = 0; i < n; i++, a += lda ) + { + real s = x[i*incx]; + if( s == 0.f ) + continue; + s *= alpha; + + for( j = 0; j <= m - 4; j += 4 ) + { + real t0 = y[j] + s*a[j]; + real t1 = y[j+1] + s*a[j+1]; + y[j] = t0; y[j+1] = t1; + t0 = y[j+2] + s*a[j+2]; + t1 = y[j+3] + s*a[j+3]; + y[j+2] = t0; y[j+3] = t1; + } + + for( ; j < m; j++ ) + y[j] += s*a[j]; + } + } + else + { + for( i = 0; i < n; i++, a += lda ) + { + real s = 0; + for( j = 0; j <= m - 4; j += 4 ) + s += x[j]*a[j] + x[j+1]*a[j+1] + x[j+2]*a[j+2] + x[j+3]*a[j+3]; + for( ; j < m; j++ ) + s += x[j]*a[j]; + y[i*incy] += alpha*s; + } + } + + return 0; + + /* End of SGEMV . */ + +} /* sgemv_ */ diff --git a/3rdparty/lapack/sger.c b/3rdparty/lapack/sger_custom.c similarity index 52% rename from 3rdparty/lapack/sger.c rename to 3rdparty/lapack/sger_custom.c index 7c5594260..b36487235 100644 --- a/3rdparty/lapack/sger.c +++ b/3rdparty/lapack/sger_custom.c @@ -1,28 +1,9 @@ -/* sger.f -- translated by f2c (version 20061008). - You must link the resulting object file with libf2c: - on Microsoft Windows system, link with libf2c.lib; - on Linux or Unix systems, link with .../path/to/libf2c.a -lm - or, if you install libf2c.a in a standard place, with -lf2c -lm - -- in that order, at the end of the command line, as in - cc *.o -lf2c -lm - Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., - - http://www.netlib.org/f2c/libf2c.zip -*/ - #include "clapack.h" - -/* Subroutine */ int sger_(integer *m, integer *n, real *alpha, real *x, - integer *incx, real *y, integer *incy, real *a, integer *lda) +/* Subroutine */ int sger_(integer *_m, integer *_n, real *_alpha, + real *x, integer *_incx, real *y, integer *_incy, + real *a, integer *_lda) { - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2; - - /* Local variables */ - integer i__, j, ix, jy, kx, info; - real temp; - extern /* Subroutine */ int xerbla_(char *, integer *); /* .. Scalar Arguments .. */ /* .. */ @@ -52,11 +33,11 @@ /* N must be at least zero. */ /* Unchanged on exit. */ -/* ALPHA - REAL . */ +/* ALPHA - SINGLE PRECISION. */ /* On entry, ALPHA specifies the scalar alpha. */ /* Unchanged on exit. */ -/* X - REAL array of dimension at least */ +/* X - SINGLE PRECISION array of dimension at least */ /* ( 1 + ( m - 1 )*abs( INCX ) ). */ /* Before entry, the incremented array X must contain the m */ /* element vector x. */ @@ -67,7 +48,7 @@ /* X. INCX must not be zero. */ /* Unchanged on exit. */ -/* Y - REAL array of dimension at least */ +/* Y - SINGLE PRECISION array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCY ) ). */ /* Before entry, the incremented array Y must contain the n */ /* element vector y. */ @@ -78,7 +59,7 @@ /* Y. INCY must not be zero. */ /* Unchanged on exit. */ -/* A - REAL array of DIMENSION ( LDA, n ). */ +/* A - SINGLE PRECISION array of DIMENSION ( LDA, n ). */ /* Before entry, the leading m by n part of the array A must */ /* contain the matrix of coefficients. On exit, A is */ /* overwritten by the updated matrix. */ @@ -110,80 +91,70 @@ /* Test the input parameters. */ - /* Parameter adjustments */ - --x; - --y; - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - /* Function Body */ - info = 0; - if (*m < 0) { - info = 1; - } else if (*n < 0) { - info = 2; - } else if (*incx == 0) { - info = 5; - } else if (*incy == 0) { - info = 7; - } else if (*lda < max(1,*m)) { - info = 9; - } - if (info != 0) { - xerbla_("SGER ", &info); - return 0; + integer i, j, m = *_m, n = *_n, incx = *_incx, incy = *_incy, lda = *_lda; + real alpha = *_alpha; + integer info = 0; + + if (m < 0) + info = 1; + else if (n < 0) + info = 2; + else if (incx == 0) + info = 5; + else if (incy == 0) + info = 7; + else if (lda < max(1,m)) + info = 9; + + if (info != 0) + { + xerbla_("SGER ", &info); + return 0; } -/* Quick return if possible. */ + if (incx < 0) + x -= (m-1)*incx; + if (incy < 0) + y -= (n-1)*incy; - if (*m == 0 || *n == 0 || *alpha == 0.f) { - return 0; + /* Start the operations. In this version the elements of A are */ + /* accessed sequentially with one pass through A. */ + + if( alpha == 0 ) + ; + else if( incx == 1 ) + { + for( j = 0; j < n; j++, a += lda ) + { + real s = y[j*incy]; + if( s == 0 ) + continue; + s *= alpha; + + for( i = 0; i <= m - 2; i += 2 ) + { + real t0 = a[i] + x[i]*s; + real t1 = a[i+1] + x[i+1]*s; + a[i] = t0; a[i+1] = t1; + } + + for( ; i < m; i++ ) + a[i] += x[i]*s; + } } - -/* Start the operations. In this version the elements of A are */ -/* accessed sequentially with one pass through A. */ - - if (*incy > 0) { - jy = 1; - } else { - jy = 1 - (*n - 1) * *incy; - } - if (*incx == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (y[jy] != 0.f) { - temp = *alpha * y[jy]; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - a[i__ + j * a_dim1] += x[i__] * temp; -/* L10: */ - } - } - jy += *incy; -/* L20: */ - } - } else { - if (*incx > 0) { - kx = 1; - } else { - kx = 1 - (*m - 1) * *incx; - } - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (y[jy] != 0.f) { - temp = *alpha * y[jy]; - ix = kx; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - a[i__ + j * a_dim1] += x[ix] * temp; - ix += *incx; -/* L30: */ - } - } - jy += *incy; -/* L40: */ - } + else + { + for( j = 0; j < n; j++, a += lda ) + { + real s = y[j*incy]; + if( s == 0 ) + continue; + s *= alpha; + + for( i = 0; i < m; i++ ) + a[i] += x[i*incx]*s; + } } return 0; diff --git a/3rdparty/lapack/slamch.c b/3rdparty/lapack/slamch.c deleted file mode 100644 index 78626fe08..000000000 --- a/3rdparty/lapack/slamch.c +++ /dev/null @@ -1,1045 +0,0 @@ -#include "clapack.h" -#include -#include - -/* *********************************************************************** */ - -doublereal slamc3_(real *a, real *b) -{ - /* System generated locals */ - real ret_val; - - - /* -- LAPACK auxiliary routine (version 3.1) -- */ - /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ - /* November 2006 */ - - /* .. Scalar Arguments .. */ - /* .. */ - - /* Purpose */ - /* ======= */ - - /* SLAMC3 is intended to force A and B to be stored prior to doing */ - /* the addition of A and B , for use in situations where optimizers */ - /* might hold one of these in a register. */ - - /* Arguments */ - /* ========= */ - - /* A (input) REAL */ - /* B (input) REAL */ - /* The values A and B. */ - - /* ===================================================================== */ - - /* .. Executable Statements .. */ - - ret_val = *a + *b; - - return ret_val; - - /* End of SLAMC3 */ - -} /* slamc3_ */ - - -#if 1 - -/* simpler version of dlamch for the case of IEEE754-compliant FPU module by Piotr Luszczek S. - taken from http://www.mail-archive.com/numpy-discussion@lists.sourceforge.net/msg02448.html */ - -#ifndef FLT_DIGITS -#define FLT_DIGITS 24 -#endif - -doublereal -slamch_(char *cmach) { - char ch = cmach[0]; - float eps=FLT_EPSILON, sfmin, small; - - if ('B' == ch || 'b' == ch) { - return FLT_RADIX; - } else if ('E' == ch || 'e' == ch) { - return eps; - } else if ('L' == ch || 'l' == ch) { - return FLT_MAX_EXP; - } else if ('M' == ch || 'm' == ch) { - return FLT_MIN_EXP; - } else if ('N' == ch || 'n' == ch) { - return FLT_DIGITS; - } else if ('O' == ch || 'o' == ch) { - return FLT_MAX; - } else if ('P' == ch || 'p' == ch) { - return eps * FLT_RADIX; - } else if ('R' == ch || 'r' == ch) { - return FLT_ROUNDS < 2; - } else if ('S' == ch || 's' == ch) { - /* Use SMALL plus a bit, to avoid the possibility of rounding causing overflow - when computing 1/sfmin. */ - sfmin = FLT_MIN; - small = (float)(2. / FLT_MAX); - if (small <= sfmin) small = sfmin * (1 + eps); - return small; - } else if ('U' == ch || 'u' == ch) { - return FLT_MIN; - } - - return 0; -} - -#else - -/* Table of constant values */ - -static integer c__1 = 1; -static real c_b32 = 0.f; - -doublereal slamch_(char *cmach) -{ - /* Initialized data */ - - static logical first = TRUE_; - - /* System generated locals */ - integer i__1; - real ret_val; - - /* Builtin functions */ - double pow_ri(real *, integer *); - - /* Local variables */ - static real t; - integer it; - static real rnd, eps, base; - integer beta; - static real emin, prec, emax; - integer imin, imax; - logical lrnd; - static real rmin, rmax; - real rmach; - extern logical lsame_(char *, char *); - real small; - static real sfmin; - extern /* Subroutine */ int slamc2_(integer *, integer *, logical *, real - *, integer *, real *, integer *, real *); - - -/* -- LAPACK auxiliary routine (version 3.1) -- */ -/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ -/* November 2006 */ - -/* .. Scalar Arguments .. */ -/* .. */ - -/* Purpose */ -/* ======= */ - -/* SLAMCH determines single precision machine parameters. */ - -/* Arguments */ -/* ========= */ - -/* CMACH (input) CHARACTER*1 */ -/* Specifies the value to be returned by SLAMCH: */ -/* = 'E' or 'e', SLAMCH := eps */ -/* = 'S' or 's , SLAMCH := sfmin */ -/* = 'B' or 'b', SLAMCH := base */ -/* = 'P' or 'p', SLAMCH := eps*base */ -/* = 'N' or 'n', SLAMCH := t */ -/* = 'R' or 'r', SLAMCH := rnd */ -/* = 'M' or 'm', SLAMCH := emin */ -/* = 'U' or 'u', SLAMCH := rmin */ -/* = 'L' or 'l', SLAMCH := emax */ -/* = 'O' or 'o', SLAMCH := rmax */ - -/* where */ - -/* eps = relative machine precision */ -/* sfmin = safe minimum, such that 1/sfmin does not overflow */ -/* base = base of the machine */ -/* prec = eps*base */ -/* t = number of (base) digits in the mantissa */ -/* rnd = 1.0 when rounding occurs in addition, 0.0 otherwise */ -/* emin = minimum exponent before (gradual) underflow */ -/* rmin = underflow threshold - base**(emin-1) */ -/* emax = largest exponent before overflow */ -/* rmax = overflow threshold - (base**emax)*(1-eps) */ - -/* ===================================================================== */ - -/* .. Parameters .. */ -/* .. */ -/* .. Local Scalars .. */ -/* .. */ -/* .. External Functions .. */ -/* .. */ -/* .. External Subroutines .. */ -/* .. */ -/* .. Save statement .. */ -/* .. */ -/* .. Data statements .. */ -/* .. */ -/* .. Executable Statements .. */ - - if (first) { - slamc2_(&beta, &it, &lrnd, &eps, &imin, &rmin, &imax, &rmax); - base = (real) beta; - t = (real) it; - if (lrnd) { - rnd = 1.f; - i__1 = 1 - it; - eps = pow_ri(&base, &i__1) / 2; - } else { - rnd = 0.f; - i__1 = 1 - it; - eps = pow_ri(&base, &i__1); - } - prec = eps * base; - emin = (real) imin; - emax = (real) imax; - sfmin = rmin; - small = 1.f / rmax; - if (small >= sfmin) { - -/* Use SMALL plus a bit, to avoid the possibility of rounding */ -/* causing overflow when computing 1/sfmin. */ - - sfmin = small * (eps + 1.f); - } - } - - if (lsame_(cmach, "E")) { - rmach = eps; - } else if (lsame_(cmach, "S")) { - rmach = sfmin; - } else if (lsame_(cmach, "B")) { - rmach = base; - } else if (lsame_(cmach, "P")) { - rmach = prec; - } else if (lsame_(cmach, "N")) { - rmach = t; - } else if (lsame_(cmach, "R")) { - rmach = rnd; - } else if (lsame_(cmach, "M")) { - rmach = emin; - } else if (lsame_(cmach, "U")) { - rmach = rmin; - } else if (lsame_(cmach, "L")) { - rmach = emax; - } else if (lsame_(cmach, "O")) { - rmach = rmax; - } - - ret_val = rmach; - first = FALSE_; - return ret_val; - -/* End of SLAMCH */ - -} /* slamch_ */ - - -/* *********************************************************************** */ - -/* Subroutine */ int slamc1_(integer *beta, integer *t, logical *rnd, logical - *ieee1) -{ - /* Initialized data */ - - static logical first = TRUE_; - - /* System generated locals */ - real r__1, r__2; - - /* Local variables */ - real a, b, c__, f, t1, t2; - static integer lt; - real one, qtr; - static logical lrnd; - static integer lbeta; - real savec; - static logical lieee1; - extern doublereal slamc3_(real *, real *); - - -/* -- LAPACK auxiliary routine (version 3.1) -- */ -/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ -/* November 2006 */ - -/* .. Scalar Arguments .. */ -/* .. */ - -/* Purpose */ -/* ======= */ - -/* SLAMC1 determines the machine parameters given by BETA, T, RND, and */ -/* IEEE1. */ - -/* Arguments */ -/* ========= */ - -/* BETA (output) INTEGER */ -/* The base of the machine. */ - -/* T (output) INTEGER */ -/* The number of ( BETA ) digits in the mantissa. */ - -/* RND (output) LOGICAL */ -/* Specifies whether proper rounding ( RND = .TRUE. ) or */ -/* chopping ( RND = .FALSE. ) occurs in addition. This may not */ -/* be a reliable guide to the way in which the machine performs */ -/* its arithmetic. */ - -/* IEEE1 (output) LOGICAL */ -/* Specifies whether rounding appears to be done in the IEEE */ -/* 'round to nearest' style. */ - -/* Further Details */ -/* =============== */ - -/* The routine is based on the routine ENVRON by Malcolm and */ -/* incorporates suggestions by Gentleman and Marovich. See */ - -/* Malcolm M. A. (1972) Algorithms to reveal properties of */ -/* floating-point arithmetic. Comms. of the ACM, 15, 949-951. */ - -/* Gentleman W. M. and Marovich S. B. (1974) More on algorithms */ -/* that reveal properties of floating point arithmetic units. */ -/* Comms. of the ACM, 17, 276-277. */ - -/* ===================================================================== */ - -/* .. Local Scalars .. */ -/* .. */ -/* .. External Functions .. */ -/* .. */ -/* .. Save statement .. */ -/* .. */ -/* .. Data statements .. */ -/* .. */ -/* .. Executable Statements .. */ - - if (first) { - one = 1.f; - -/* LBETA, LIEEE1, LT and LRND are the local values of BETA, */ -/* IEEE1, T and RND. */ - -/* Throughout this routine we use the function SLAMC3 to ensure */ -/* that relevant values are stored and not held in registers, or */ -/* are not affected by optimizers. */ - -/* Compute a = 2.0**m with the smallest positive integer m such */ -/* that */ - -/* fl( a + 1.0 ) = a. */ - - a = 1.f; - c__ = 1.f; - -/* + WHILE( C.EQ.ONE )LOOP */ -L10: - if (c__ == one) { - a *= 2; - c__ = slamc3_(&a, &one); - r__1 = -a; - c__ = slamc3_(&c__, &r__1); - goto L10; - } -/* + END WHILE */ - -/* Now compute b = 2.0**m with the smallest positive integer m */ -/* such that */ - -/* fl( a + b ) .gt. a. */ - - b = 1.f; - c__ = slamc3_(&a, &b); - -/* + WHILE( C.EQ.A )LOOP */ -L20: - if (c__ == a) { - b *= 2; - c__ = slamc3_(&a, &b); - goto L20; - } -/* + END WHILE */ - -/* Now compute the base. a and c are neighbouring floating point */ -/* numbers in the interval ( beta**t, beta**( t + 1 ) ) and so */ -/* their difference is beta. Adding 0.25 to c is to ensure that it */ -/* is truncated to beta and not ( beta - 1 ). */ - - qtr = one / 4; - savec = c__; - r__1 = -a; - c__ = slamc3_(&c__, &r__1); - lbeta = c__ + qtr; - -/* Now determine whether rounding or chopping occurs, by adding a */ -/* bit less than beta/2 and a bit more than beta/2 to a. */ - - b = (real) lbeta; - r__1 = b / 2; - r__2 = -b / 100; - f = slamc3_(&r__1, &r__2); - c__ = slamc3_(&f, &a); - if (c__ == a) { - lrnd = TRUE_; - } else { - lrnd = FALSE_; - } - r__1 = b / 2; - r__2 = b / 100; - f = slamc3_(&r__1, &r__2); - c__ = slamc3_(&f, &a); - if (lrnd && c__ == a) { - lrnd = FALSE_; - } - -/* Try and decide whether rounding is done in the IEEE 'round to */ -/* nearest' style. B/2 is half a unit in the last place of the two */ -/* numbers A and SAVEC. Furthermore, A is even, i.e. has last bit */ -/* zero, and SAVEC is odd. Thus adding B/2 to A should not change */ -/* A, but adding B/2 to SAVEC should change SAVEC. */ - - r__1 = b / 2; - t1 = slamc3_(&r__1, &a); - r__1 = b / 2; - t2 = slamc3_(&r__1, &savec); - lieee1 = t1 == a && t2 > savec && lrnd; - -/* Now find the mantissa, t. It should be the integer part of */ -/* log to the base beta of a, however it is safer to determine t */ -/* by powering. So we find t as the smallest positive integer for */ -/* which */ - -/* fl( beta**t + 1.0 ) = 1.0. */ - - lt = 0; - a = 1.f; - c__ = 1.f; - -/* + WHILE( C.EQ.ONE )LOOP */ -L30: - if (c__ == one) { - ++lt; - a *= lbeta; - c__ = slamc3_(&a, &one); - r__1 = -a; - c__ = slamc3_(&c__, &r__1); - goto L30; - } -/* + END WHILE */ - - } - - *beta = lbeta; - *t = lt; - *rnd = lrnd; - *ieee1 = lieee1; - first = FALSE_; - return 0; - -/* End of SLAMC1 */ - -} /* slamc1_ */ - - -/* *********************************************************************** */ - -/* Subroutine */ int slamc2_(integer *beta, integer *t, logical *rnd, real * - eps, integer *emin, real *rmin, integer *emax, real *rmax) -{ - /* Initialized data */ - - static logical first = TRUE_; - static logical iwarn = FALSE_; - - /* Format strings */ - static char fmt_9999[] = "(//\002 WARNING. The value EMIN may be incorre" - "ct:-\002,\002 EMIN = \002,i8,/\002 If, after inspection, the va" - "lue EMIN looks\002,\002 acceptable please comment out \002,/\002" - " the IF block as marked within the code of routine\002,\002 SLAM" - "C2,\002,/\002 otherwise supply EMIN explicitly.\002,/)"; - - /* System generated locals */ - integer i__1; - real r__1, r__2, r__3, r__4, r__5; - - /* Builtin functions */ - double pow_ri(real *, integer *); - //integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); - - /* Local variables */ - real a, b, c__; - integer i__; - static integer lt; - real one, two; - logical ieee; - real half; - logical lrnd; - static real leps; - real zero; - static integer lbeta; - real rbase; - static integer lemin, lemax; - integer gnmin; - real small; - integer gpmin; - real third; - static real lrmin, lrmax; - real sixth; - logical lieee1; - extern /* Subroutine */ int slamc1_(integer *, integer *, logical *, - logical *); - extern doublereal slamc3_(real *, real *); - extern /* Subroutine */ int slamc4_(integer *, real *, integer *), - slamc5_(integer *, integer *, integer *, logical *, integer *, - real *); - integer ngnmin, ngpmin; - - /* Fortran I/O blocks */ - static cilist io___58 = { 0, 6, 0, fmt_9999, 0 }; - - - -/* -- LAPACK auxiliary routine (version 3.1) -- */ -/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ -/* November 2006 */ - -/* .. Scalar Arguments .. */ -/* .. */ - -/* Purpose */ -/* ======= */ - -/* SLAMC2 determines the machine parameters specified in its argument */ -/* list. */ - -/* Arguments */ -/* ========= */ - -/* BETA (output) INTEGER */ -/* The base of the machine. */ - -/* T (output) INTEGER */ -/* The number of ( BETA ) digits in the mantissa. */ - -/* RND (output) LOGICAL */ -/* Specifies whether proper rounding ( RND = .TRUE. ) or */ -/* chopping ( RND = .FALSE. ) occurs in addition. This may not */ -/* be a reliable guide to the way in which the machine performs */ -/* its arithmetic. */ - -/* EPS (output) REAL */ -/* The smallest positive number such that */ - -/* fl( 1.0 - EPS ) .LT. 1.0, */ - -/* where fl denotes the computed value. */ - -/* EMIN (output) INTEGER */ -/* The minimum exponent before (gradual) underflow occurs. */ - -/* RMIN (output) REAL */ -/* The smallest normalized number for the machine, given by */ -/* BASE**( EMIN - 1 ), where BASE is the floating point value */ -/* of BETA. */ - -/* EMAX (output) INTEGER */ -/* The maximum exponent before overflow occurs. */ - -/* RMAX (output) REAL */ -/* The largest positive number for the machine, given by */ -/* BASE**EMAX * ( 1 - EPS ), where BASE is the floating point */ -/* value of BETA. */ - -/* Further Details */ -/* =============== */ - -/* The computation of EPS is based on a routine PARANOIA by */ -/* W. Kahan of the University of California at Berkeley. */ - -/* ===================================================================== */ - -/* .. Local Scalars .. */ -/* .. */ -/* .. External Functions .. */ -/* .. */ -/* .. External Subroutines .. */ -/* .. */ -/* .. Intrinsic Functions .. */ -/* .. */ -/* .. Save statement .. */ -/* .. */ -/* .. Data statements .. */ -/* .. */ -/* .. Executable Statements .. */ - - if (first) { - zero = 0.f; - one = 1.f; - two = 2.f; - -/* LBETA, LT, LRND, LEPS, LEMIN and LRMIN are the local values of */ -/* BETA, T, RND, EPS, EMIN and RMIN. */ - -/* Throughout this routine we use the function SLAMC3 to ensure */ -/* that relevant values are stored and not held in registers, or */ -/* are not affected by optimizers. */ - -/* SLAMC1 returns the parameters LBETA, LT, LRND and LIEEE1. */ - - slamc1_(&lbeta, <, &lrnd, &lieee1); - -/* Start to find EPS. */ - - b = (real) lbeta; - i__1 = -lt; - a = pow_ri(&b, &i__1); - leps = a; - -/* Try some tricks to see whether or not this is the correct EPS. */ - - b = two / 3; - half = one / 2; - r__1 = -half; - sixth = slamc3_(&b, &r__1); - third = slamc3_(&sixth, &sixth); - r__1 = -half; - b = slamc3_(&third, &r__1); - b = slamc3_(&b, &sixth); - b = dabs(b); - if (b < leps) { - b = leps; - } - - leps = 1.f; - -/* + WHILE( ( LEPS.GT.B ).AND.( B.GT.ZERO ) )LOOP */ -L10: - if (leps > b && b > zero) { - leps = b; - r__1 = half * leps; -/* Computing 5th power */ - r__3 = two, r__4 = r__3, r__3 *= r__3; -/* Computing 2nd power */ - r__5 = leps; - r__2 = r__4 * (r__3 * r__3) * (r__5 * r__5); - c__ = slamc3_(&r__1, &r__2); - r__1 = -c__; - c__ = slamc3_(&half, &r__1); - b = slamc3_(&half, &c__); - r__1 = -b; - c__ = slamc3_(&half, &r__1); - b = slamc3_(&half, &c__); - goto L10; - } -/* + END WHILE */ - - if (a < leps) { - leps = a; - } - -/* Computation of EPS complete. */ - -/* Now find EMIN. Let A = + or - 1, and + or - (1 + BASE**(-3)). */ -/* Keep dividing A by BETA until (gradual) underflow occurs. This */ -/* is detected when we cannot recover the previous A. */ - - rbase = one / lbeta; - small = one; - for (i__ = 1; i__ <= 3; ++i__) { - r__1 = small * rbase; - small = slamc3_(&r__1, &zero); -/* L20: */ - } - a = slamc3_(&one, &small); - slamc4_(&ngpmin, &one, &lbeta); - r__1 = -one; - slamc4_(&ngnmin, &r__1, &lbeta); - slamc4_(&gpmin, &a, &lbeta); - r__1 = -a; - slamc4_(&gnmin, &r__1, &lbeta); - ieee = FALSE_; - - if (ngpmin == ngnmin && gpmin == gnmin) { - if (ngpmin == gpmin) { - lemin = ngpmin; -/* ( Non twos-complement machines, no gradual underflow; */ -/* e.g., VAX ) */ - } else if (gpmin - ngpmin == 3) { - lemin = ngpmin - 1 + lt; - ieee = TRUE_; -/* ( Non twos-complement machines, with gradual underflow; */ -/* e.g., IEEE standard followers ) */ - } else { - lemin = min(ngpmin,gpmin); -/* ( A guess; no known machine ) */ - iwarn = TRUE_; - } - - } else if (ngpmin == gpmin && ngnmin == gnmin) { - if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1) { - lemin = max(ngpmin,ngnmin); -/* ( Twos-complement machines, no gradual underflow; */ -/* e.g., CYBER 205 ) */ - } else { - lemin = min(ngpmin,ngnmin); -/* ( A guess; no known machine ) */ - iwarn = TRUE_; - } - - } else if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1 && gpmin == gnmin) - { - if (gpmin - min(ngpmin,ngnmin) == 3) { - lemin = max(ngpmin,ngnmin) - 1 + lt; -/* ( Twos-complement machines with gradual underflow; */ -/* no known machine ) */ - } else { - lemin = min(ngpmin,ngnmin); -/* ( A guess; no known machine ) */ - iwarn = TRUE_; - } - - } else { -/* Computing MIN */ - i__1 = min(ngpmin,ngnmin), i__1 = min(i__1,gpmin); - lemin = min(i__1,gnmin); -/* ( A guess; no known machine ) */ - iwarn = TRUE_; - } - first = FALSE_; -/* ** */ -/* Comment out this if block if EMIN is ok */ - if (iwarn) { - first = TRUE_; - printf("\n\n WARNING. The value EMIN may be incorrect:- "); - printf("EMIN = %8i\n",lemin); - printf("If, after inspection, the value EMIN looks acceptable"); - printf("please comment out \n the IF block as marked within the"); - printf("code of routine SLAMC2, \n otherwise supply EMIN"); - printf("explicitly.\n"); - /* - s_wsfe(&io___58); - do_fio(&c__1, (char *)&lemin, (ftnlen)sizeof(integer)); - e_wsfe(); - */ - } -/* ** */ - -/* Assume IEEE arithmetic if we found denormalised numbers above, */ -/* or if arithmetic seems to round in the IEEE style, determined */ -/* in routine SLAMC1. A true IEEE machine should have both things */ -/* true; however, faulty machines may have one or the other. */ - - ieee = ieee || lieee1; - -/* Compute RMIN by successive division by BETA. We could compute */ -/* RMIN as BASE**( EMIN - 1 ), but some machines underflow during */ -/* this computation. */ - - lrmin = 1.f; - i__1 = 1 - lemin; - for (i__ = 1; i__ <= i__1; ++i__) { - r__1 = lrmin * rbase; - lrmin = slamc3_(&r__1, &zero); -/* L30: */ - } - -/* Finally, call SLAMC5 to compute EMAX and RMAX. */ - - slamc5_(&lbeta, <, &lemin, &ieee, &lemax, &lrmax); - } - - *beta = lbeta; - *t = lt; - *rnd = lrnd; - *eps = leps; - *emin = lemin; - *rmin = lrmin; - *emax = lemax; - *rmax = lrmax; - - return 0; - - -/* End of SLAMC2 */ - -} /* slamc2_ */ - - -/* *********************************************************************** */ - -/* Subroutine */ int slamc4_(integer *emin, real *start, integer *base) -{ - /* System generated locals */ - integer i__1; - real r__1; - - /* Local variables */ - real a; - integer i__; - real b1, b2, c1, c2, d1, d2, one, zero, rbase; - extern doublereal slamc3_(real *, real *); - - -/* -- LAPACK auxiliary routine (version 3.1) -- */ -/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ -/* November 2006 */ - -/* .. Scalar Arguments .. */ -/* .. */ - -/* Purpose */ -/* ======= */ - -/* SLAMC4 is a service routine for SLAMC2. */ - -/* Arguments */ -/* ========= */ - -/* EMIN (output) INTEGER */ -/* The minimum exponent before (gradual) underflow, computed by */ -/* setting A = START and dividing by BASE until the previous A */ -/* can not be recovered. */ - -/* START (input) REAL */ -/* The starting point for determining EMIN. */ - -/* BASE (input) INTEGER */ -/* The base of the machine. */ - -/* ===================================================================== */ - -/* .. Local Scalars .. */ -/* .. */ -/* .. External Functions .. */ -/* .. */ -/* .. Executable Statements .. */ - - a = *start; - one = 1.f; - rbase = one / *base; - zero = 0.f; - *emin = 1; - r__1 = a * rbase; - b1 = slamc3_(&r__1, &zero); - c1 = a; - c2 = a; - d1 = a; - d2 = a; -/* + WHILE( ( C1.EQ.A ).AND.( C2.EQ.A ).AND. */ -/* $ ( D1.EQ.A ).AND.( D2.EQ.A ) )LOOP */ -L10: - if (c1 == a && c2 == a && d1 == a && d2 == a) { - --(*emin); - a = b1; - r__1 = a / *base; - b1 = slamc3_(&r__1, &zero); - r__1 = b1 * *base; - c1 = slamc3_(&r__1, &zero); - d1 = zero; - i__1 = *base; - for (i__ = 1; i__ <= i__1; ++i__) { - d1 += b1; -/* L20: */ - } - r__1 = a * rbase; - b2 = slamc3_(&r__1, &zero); - r__1 = b2 / rbase; - c2 = slamc3_(&r__1, &zero); - d2 = zero; - i__1 = *base; - for (i__ = 1; i__ <= i__1; ++i__) { - d2 += b2; -/* L30: */ - } - goto L10; - } -/* + END WHILE */ - - return 0; - -/* End of SLAMC4 */ - -} /* slamc4_ */ - - -/* *********************************************************************** */ - -/* Subroutine */ int slamc5_(integer *beta, integer *p, integer *emin, - logical *ieee, integer *emax, real *rmax) -{ - /* System generated locals */ - integer i__1; - real r__1; - - /* Local variables */ - integer i__; - real y, z__; - integer try__, lexp; - real oldy; - integer uexp, nbits; - extern doublereal slamc3_(real *, real *); - real recbas; - integer exbits, expsum; - - -/* -- LAPACK auxiliary routine (version 3.1) -- */ -/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ -/* November 2006 */ - -/* .. Scalar Arguments .. */ -/* .. */ - -/* Purpose */ -/* ======= */ - -/* SLAMC5 attempts to compute RMAX, the largest machine floating-point */ -/* number, without overflow. It assumes that EMAX + abs(EMIN) sum */ -/* approximately to a power of 2. It will fail on machines where this */ -/* assumption does not hold, for example, the Cyber 205 (EMIN = -28625, */ -/* EMAX = 28718). It will also fail if the value supplied for EMIN is */ -/* too large (i.e. too close to zero), probably with overflow. */ - -/* Arguments */ -/* ========= */ - -/* BETA (input) INTEGER */ -/* The base of floating-point arithmetic. */ - -/* P (input) INTEGER */ -/* The number of base BETA digits in the mantissa of a */ -/* floating-point value. */ - -/* EMIN (input) INTEGER */ -/* The minimum exponent before (gradual) underflow. */ - -/* IEEE (input) LOGICAL */ -/* A logical flag specifying whether or not the arithmetic */ -/* system is thought to comply with the IEEE standard. */ - -/* EMAX (output) INTEGER */ -/* The largest exponent before overflow */ - -/* RMAX (output) REAL */ -/* The largest machine floating-point number. */ - -/* ===================================================================== */ - -/* .. Parameters .. */ -/* .. */ -/* .. Local Scalars .. */ -/* .. */ -/* .. External Functions .. */ -/* .. */ -/* .. Intrinsic Functions .. */ -/* .. */ -/* .. Executable Statements .. */ - -/* First compute LEXP and UEXP, two powers of 2 that bound */ -/* abs(EMIN). We then assume that EMAX + abs(EMIN) will sum */ -/* approximately to the bound that is closest to abs(EMIN). */ -/* (EMAX is the exponent of the required number RMAX). */ - - lexp = 1; - exbits = 1; -L10: - try__ = lexp << 1; - if (try__ <= -(*emin)) { - lexp = try__; - ++exbits; - goto L10; - } - if (lexp == -(*emin)) { - uexp = lexp; - } else { - uexp = try__; - ++exbits; - } - -/* Now -LEXP is less than or equal to EMIN, and -UEXP is greater */ -/* than or equal to EMIN. EXBITS is the number of bits needed to */ -/* store the exponent. */ - - if (uexp + *emin > -lexp - *emin) { - expsum = lexp << 1; - } else { - expsum = uexp << 1; - } - -/* EXPSUM is the exponent range, approximately equal to */ -/* EMAX - EMIN + 1 . */ - - *emax = expsum + *emin - 1; - nbits = exbits + 1 + *p; - -/* NBITS is the total number of bits needed to store a */ -/* floating-point number. */ - - if (nbits % 2 == 1 && *beta == 2) { - -/* Either there are an odd number of bits used to store a */ -/* floating-point number, which is unlikely, or some bits are */ -/* not used in the representation of numbers, which is possible, */ -/* (e.g. Cray machines) or the mantissa has an implicit bit, */ -/* (e.g. IEEE machines, Dec Vax machines), which is perhaps the */ -/* most likely. We have to assume the last alternative. */ -/* If this is true, then we need to reduce EMAX by one because */ -/* there must be some way of representing zero in an implicit-bit */ -/* system. On machines like Cray, we are reducing EMAX by one */ -/* unnecessarily. */ - - --(*emax); - } - - if (*ieee) { - -/* Assume we are on an IEEE machine which reserves one exponent */ -/* for infinity and NaN. */ - - --(*emax); - } - -/* Now create RMAX, the largest machine number, which should */ -/* be equal to (1.0 - BETA**(-P)) * BETA**EMAX . */ - -/* First compute 1.0 - BETA**(-P), being careful that the */ -/* result is less than 1.0 . */ - - recbas = 1.f / *beta; - z__ = *beta - 1.f; - y = 0.f; - i__1 = *p; - for (i__ = 1; i__ <= i__1; ++i__) { - z__ *= recbas; - if (y < 1.f) { - oldy = y; - } - y = slamc3_(&y, &z__); -/* L20: */ - } - if (y >= 1.f) { - y = oldy; - } - -/* Now multiply by BETA**EMAX to get RMAX. */ - - i__1 = *emax; - for (i__ = 1; i__ <= i__1; ++i__) { - r__1 = y * *beta; - y = slamc3_(&r__1, &c_b32); -/* L30: */ - } - - *rmax = y; - return 0; - -/* End of SLAMC5 */ - -} /* slamc5_ */ - -#endif diff --git a/3rdparty/lapack/slamch_custom.c b/3rdparty/lapack/slamch_custom.c new file mode 100644 index 000000000..b0e073eed --- /dev/null +++ b/3rdparty/lapack/slamch_custom.c @@ -0,0 +1,88 @@ +#include "clapack.h" +#include +#include + +/* *********************************************************************** */ + +doublereal slamc3_(real *a, real *b) +{ + /* System generated locals */ + real ret_val; + + + /* -- LAPACK auxiliary routine (version 3.1) -- */ + /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ + /* November 2006 */ + + /* .. Scalar Arguments .. */ + /* .. */ + + /* Purpose */ + /* ======= */ + + /* SLAMC3 is intended to force A and B to be stored prior to doing */ + /* the addition of A and B , for use in situations where optimizers */ + /* might hold one of these in a register. */ + + /* Arguments */ + /* ========= */ + + /* A (input) REAL */ + /* B (input) REAL */ + /* The values A and B. */ + + /* ===================================================================== */ + + /* .. Executable Statements .. */ + + ret_val = *a + *b; + + return ret_val; + + /* End of SLAMC3 */ + +} /* slamc3_ */ + + +const unsigned char lapack_toupper_tab[] = +{ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, + 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, + 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 123, 124, 125, 126, 127, 128, 129, 130, 131, + 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, + 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, + 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, + 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, + 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, + 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, + 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255 +}; + +/* simpler version of dlamch for the case of IEEE754-compliant FPU module by Piotr Luszczek S. + taken from http://www.mail-archive.com/numpy-discussion@lists.sourceforge.net/msg02448.html */ + +#ifndef FLT_DIGITS +#define FLT_DIGITS 24 +#endif + +const unsigned char lapack_lamch_tab[] = +{ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 3, 4, 5, 6, 7, 0, 8, 9, 0, 10, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 3, 4, 5, 6, 7, 0, 8, 9, + 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +const doublereal lapack_slamch_tab[] = +{ + 0, FLT_RADIX, FLT_EPSILON, FLT_MAX_EXP, FLT_MIN_EXP, FLT_DIGITS, FLT_MAX, + FLT_EPSILON*FLT_RADIX, 1, FLT_MIN*(1 + FLT_EPSILON), FLT_MIN +}; diff --git a/3rdparty/lapack/slartg.c b/3rdparty/lapack/slartg_custom.c similarity index 80% rename from 3rdparty/lapack/slartg.c rename to 3rdparty/lapack/slartg_custom.c index b0b8efa8f..b3b403e4d 100644 --- a/3rdparty/lapack/slartg.c +++ b/3rdparty/lapack/slartg_custom.c @@ -1,15 +1,3 @@ -/* slartg.f -- translated by f2c (version 20061008). - You must link the resulting object file with libf2c: - on Microsoft Windows system, link with libf2c.lib; - on Linux or Unix systems, link with .../path/to/libf2c.a -lm - or, if you install libf2c.a in a standard place, with -lf2c -lm - -- in that order, at the end of the command line, as in - cc *.o -lf2c -lm - Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., - - http://www.netlib.org/f2c/libf2c.zip -*/ - #include "clapack.h" @@ -19,17 +7,13 @@ integer i__1; real r__1, r__2; - /* Builtin functions */ - double log(doublereal), pow_ri(real *, integer *), sqrt(doublereal); - /* Local variables */ integer i__; real f1, g1, eps, scale; integer count; - real safmn2, safmx2; - extern doublereal slamch_(char *); - real safmin; - + static real safmn2, safmx2; + static real safmin; + static logical FIRST = TRUE_; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ @@ -96,15 +80,16 @@ /* .. */ /* .. Executable Statements .. */ -/* IF( FIRST ) THEN */ - safmin = slamch_("S"); - eps = slamch_("E"); - r__1 = slamch_("B"); - i__1 = (integer) (log(safmin / eps) / log(slamch_("B")) / 2.f); - safmn2 = pow_ri(&r__1, &i__1); - safmx2 = 1.f / safmn2; -/* FIRST = .FALSE. */ -/* END IF */ + if(FIRST) + { + safmin = slamch_("S"); + eps = slamch_("E"); + r__1 = slamch_("B"); + i__1 = (integer) (log(safmin / eps) / log(slamch_("B")) / 2.f); + safmn2 = pow_ri(&r__1, &i__1); + safmx2 = 1.f / safmn2; + FIRST = FALSE_; + } if (*g == 0.f) { *cs = 1.f; *sn = 0.f; diff --git a/3rdparty/lapack/slasr.c b/3rdparty/lapack/slasr_custom.c similarity index 100% rename from 3rdparty/lapack/slasr.c rename to 3rdparty/lapack/slasr_custom.c