optimized lapack' SVD for noticeably better performance on small matrices

This commit is contained in:
Vadim Pisarevsky
2010-08-30 16:37:22 +00:00
parent fea66d9384
commit e48a456d48
19 changed files with 957 additions and 3620 deletions

View File

@@ -37,11 +37,28 @@ static __inline double r_sign(real *a, real *b)
return *b >= 0 ? x : -x;
}
extern const unsigned char lapack_toupper_tab[];
#define lapack_toupper(c) ((char)lapack_toupper_tab[(unsigned char)(c)])
extern const unsigned char lapack_lamch_tab[];
extern const doublereal lapack_dlamch_tab[];
extern const doublereal lapack_slamch_tab[];
static __inline logical lsame_(char *ca, char *cb)
{
return toupper(ca[0]) == toupper(cb[0]);
return lapack_toupper(ca[0]) == lapack_toupper(cb[0]);
}
static __inline doublereal dlamch_(char* cmach)
{
return lapack_dlamch_tab[lapack_lamch_tab[(unsigned char)cmach[0]]];
}
static __inline doublereal slamch_(char* cmach)
{
return lapack_slamch_tab[lapack_lamch_tab[(unsigned char)cmach[0]]];
}
static __inline integer i_nint(real *x)
{
return (integer)(*x >= 0 ? floor(*x + .5) : -floor(.5 - *x));

View File

@@ -3680,8 +3680,6 @@ doublereal dsecnd_();
doublereal second_();
doublereal slamch_(char *cmach);
/* Subroutine */ int slamc1_(integer *beta, integer *t, logical *rnd, logical
*ieee1);
@@ -3696,8 +3694,6 @@ doublereal slamc3_(real *a, real *b);
logical *ieee, integer *emax, real *rmax);
doublereal dlamch_(char *cmach);
/* Subroutine */ int dlamc1_(integer *beta, integer *t, logical *rnd, logical
*ieee1);
@@ -3712,9 +3708,6 @@ doublereal dlamc3_(doublereal *a, doublereal *b);
/* Subroutine */ int dlamc5_(integer *beta, integer *p, integer *emin,
logical *ieee, integer *emax, doublereal *rmax);
integer ilaenv_(integer *ispec, char *name__, char *opts, integer *n1,
integer *n2, integer *n3, integer *n4);
#ifdef __cplusplus
}
#endif

View File

@@ -7,6 +7,7 @@
#ifndef F2C_INCLUDE
#define F2C_INCLUDE
#include <assert.h>
#include <math.h>
#include <ctype.h>
#include <stdlib.h>
@@ -17,6 +18,10 @@
#include <string.h>
#include <stdio.h>
#if __SSE2__ || defined _M_X64
#include "emmintrin.h"
#endif
#ifdef __cplusplus
extern "C" {
#endif