opencv/3rdparty/lapack/slasd3.c

438 lines
14 KiB
C
Raw Normal View History

#include "clapack.h"
/* Table of constant values */
static integer c__1 = 1;
static integer c__0 = 0;
static real c_b13 = 1.f;
static real c_b26 = 0.f;
/* Subroutine */ int slasd3_(integer *nl, integer *nr, integer *sqre, integer
*k, real *d__, real *q, integer *ldq, real *dsigma, real *u, integer *
ldu, real *u2, integer *ldu2, real *vt, integer *ldvt, real *vt2,
integer *ldvt2, integer *idxc, integer *ctot, real *z__, integer *
info)
{
/* System generated locals */
integer q_dim1, q_offset, u_dim1, u_offset, u2_dim1, u2_offset, vt_dim1,
vt_offset, vt2_dim1, vt2_offset, i__1, i__2;
real r__1, r__2;
/* Builtin functions */
double sqrt(doublereal), r_sign(real *, real *);
/* Local variables */
integer i__, j, m, n, jc;
real rho;
integer nlp1, nlp2, nrp1;
real temp;
extern doublereal snrm2_(integer *, real *, integer *);
integer ctemp;
extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
integer *, real *, real *, integer *, real *, integer *, real *,
real *, integer *);
integer ktemp;
extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
integer *);
extern doublereal slamc3_(real *, real *);
extern /* Subroutine */ int slasd4_(integer *, integer *, real *, real *,
real *, real *, real *, real *, integer *), xerbla_(char *,
integer *), slascl_(char *, integer *, integer *, real *,
real *, integer *, integer *, real *, integer *, integer *), slacpy_(char *, integer *, integer *, real *, integer *,
real *, integer *);
/* -- LAPACK auxiliary routine (version 3.1) -- */
/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/* November 2006 */
/* .. Scalar Arguments .. */
/* .. */
/* .. Array Arguments .. */
/* .. */
/* Purpose */
/* ======= */
/* SLASD3 finds all the square roots of the roots of the secular */
/* equation, as defined by the values in D and Z. It makes the */
/* appropriate calls to SLASD4 and then updates the singular */
/* vectors by matrix multiplication. */
/* This code makes very mild assumptions about floating point */
/* arithmetic. It will work on machines with a guard digit in */
/* add/subtract, or on those binary machines without guard digits */
/* which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. */
/* It could conceivably fail on hexadecimal or decimal machines */
/* without guard digits, but we know of none. */
/* SLASD3 is called from SLASD1. */
/* Arguments */
/* ========= */
/* NL (input) INTEGER */
/* The row dimension of the upper block. NL >= 1. */
/* NR (input) INTEGER */
/* The row dimension of the lower block. NR >= 1. */
/* SQRE (input) INTEGER */
/* = 0: the lower block is an NR-by-NR square matrix. */
/* = 1: the lower block is an NR-by-(NR+1) rectangular matrix. */
/* The bidiagonal matrix has N = NL + NR + 1 rows and */
/* M = N + SQRE >= N columns. */
/* K (input) INTEGER */
/* The size of the secular equation, 1 =< K = < N. */
/* D (output) REAL array, dimension(K) */
/* On exit the square roots of the roots of the secular equation, */
/* in ascending order. */
/* Q (workspace) REAL array, */
/* dimension at least (LDQ,K). */
/* LDQ (input) INTEGER */
/* The leading dimension of the array Q. LDQ >= K. */
/* DSIGMA (input/output) REAL array, dimension(K) */
/* The first K elements of this array contain the old roots */
/* of the deflated updating problem. These are the poles */
/* of the secular equation. */
/* U (output) REAL array, dimension (LDU, N) */
/* The last N - K columns of this matrix contain the deflated */
/* left singular vectors. */
/* LDU (input) INTEGER */
/* The leading dimension of the array U. LDU >= N. */
/* U2 (input) REAL array, dimension (LDU2, N) */
/* The first K columns of this matrix contain the non-deflated */
/* left singular vectors for the split problem. */
/* LDU2 (input) INTEGER */
/* The leading dimension of the array U2. LDU2 >= N. */
/* VT (output) REAL array, dimension (LDVT, M) */
/* The last M - K columns of VT' contain the deflated */
/* right singular vectors. */
/* LDVT (input) INTEGER */
/* The leading dimension of the array VT. LDVT >= N. */
/* VT2 (input/output) REAL array, dimension (LDVT2, N) */
/* The first K columns of VT2' contain the non-deflated */
/* right singular vectors for the split problem. */
/* LDVT2 (input) INTEGER */
/* The leading dimension of the array VT2. LDVT2 >= N. */
/* IDXC (input) INTEGER array, dimension (N) */
/* The permutation used to arrange the columns of U (and rows of */
/* VT) into three groups: the first group contains non-zero */
/* entries only at and above (or before) NL +1; the second */
/* contains non-zero entries only at and below (or after) NL+2; */
/* and the third is dense. The first column of U and the row of */
/* VT are treated separately, however. */
/* The rows of the singular vectors found by SLASD4 */
/* must be likewise permuted before the matrix multiplies can */
/* take place. */
/* CTOT (input) INTEGER array, dimension (4) */
/* A count of the total number of the various types of columns */
/* in U (or rows in VT), as described in IDXC. The fourth column */
/* type is any column which has been deflated. */
/* Z (input/output) REAL array, dimension (K) */
/* The first K elements of this array contain the components */
/* of the deflation-adjusted updating row vector. */
/* INFO (output) INTEGER */
/* = 0: successful exit. */
/* < 0: if INFO = -i, the i-th argument had an illegal value. */
/* > 0: if INFO = 1, an singular value did not converge */
/* Further Details */
/* =============== */
/* Based on contributions by */
/* Ming Gu and Huan Ren, Computer Science Division, University of */
/* California at Berkeley, USA */
/* ===================================================================== */
/* .. Parameters .. */
/* .. */
/* .. Local Scalars .. */
/* .. */
/* .. External Functions .. */
/* .. */
/* .. External Subroutines .. */
/* .. */
/* .. Intrinsic Functions .. */
/* .. */
/* .. Executable Statements .. */
/* Test the input parameters. */
/* Parameter adjustments */
--d__;
q_dim1 = *ldq;
q_offset = 1 + q_dim1;
q -= q_offset;
--dsigma;
u_dim1 = *ldu;
u_offset = 1 + u_dim1;
u -= u_offset;
u2_dim1 = *ldu2;
u2_offset = 1 + u2_dim1;
u2 -= u2_offset;
vt_dim1 = *ldvt;
vt_offset = 1 + vt_dim1;
vt -= vt_offset;
vt2_dim1 = *ldvt2;
vt2_offset = 1 + vt2_dim1;
vt2 -= vt2_offset;
--idxc;
--ctot;
--z__;
/* Function Body */
*info = 0;
if (*nl < 1) {
*info = -1;
} else if (*nr < 1) {
*info = -2;
} else if (*sqre != 1 && *sqre != 0) {
*info = -3;
}
n = *nl + *nr + 1;
m = n + *sqre;
nlp1 = *nl + 1;
nlp2 = *nl + 2;
if (*k < 1 || *k > n) {
*info = -4;
} else if (*ldq < *k) {
*info = -7;
} else if (*ldu < n) {
*info = -10;
} else if (*ldu2 < n) {
*info = -12;
} else if (*ldvt < m) {
*info = -14;
} else if (*ldvt2 < m) {
*info = -16;
}
if (*info != 0) {
i__1 = -(*info);
xerbla_("SLASD3", &i__1);
return 0;
}
/* Quick return if possible */
if (*k == 1) {
d__[1] = dabs(z__[1]);
scopy_(&m, &vt2[vt2_dim1 + 1], ldvt2, &vt[vt_dim1 + 1], ldvt);
if (z__[1] > 0.f) {
scopy_(&n, &u2[u2_dim1 + 1], &c__1, &u[u_dim1 + 1], &c__1);
} else {
i__1 = n;
for (i__ = 1; i__ <= i__1; ++i__) {
u[i__ + u_dim1] = -u2[i__ + u2_dim1];
/* L10: */
}
}
return 0;
}
/* Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can */
/* be computed with high relative accuracy (barring over/underflow). */
/* This is a problem on machines without a guard digit in */
/* add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). */
/* The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I), */
/* which on any of these machines zeros out the bottommost */
/* bit of DSIGMA(I) if it is 1; this makes the subsequent */
/* subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation */
/* occurs. On binary machines with a guard digit (almost all */
/* machines) it does not change DSIGMA(I) at all. On hexadecimal */
/* and decimal machines with a guard digit, it slightly */
/* changes the bottommost bits of DSIGMA(I). It does not account */
/* for hexadecimal or decimal machines without guard digits */
/* (we know of none). We use a subroutine call to compute */
/* 2*DSIGMA(I) to prevent optimizing compilers from eliminating */
/* this code. */
i__1 = *k;
for (i__ = 1; i__ <= i__1; ++i__) {
dsigma[i__] = slamc3_(&dsigma[i__], &dsigma[i__]) - dsigma[i__];
/* L20: */
}
/* Keep a copy of Z. */
scopy_(k, &z__[1], &c__1, &q[q_offset], &c__1);
/* Normalize Z. */
rho = snrm2_(k, &z__[1], &c__1);
slascl_("G", &c__0, &c__0, &rho, &c_b13, k, &c__1, &z__[1], k, info);
rho *= rho;
/* Find the new singular values. */
i__1 = *k;
for (j = 1; j <= i__1; ++j) {
slasd4_(k, &j, &dsigma[1], &z__[1], &u[j * u_dim1 + 1], &rho, &d__[j],
&vt[j * vt_dim1 + 1], info);
/* If the zero finder fails, the computation is terminated. */
if (*info != 0) {
return 0;
}
/* L30: */
}
/* Compute updated Z. */
i__1 = *k;
for (i__ = 1; i__ <= i__1; ++i__) {
z__[i__] = u[i__ + *k * u_dim1] * vt[i__ + *k * vt_dim1];
i__2 = i__ - 1;
for (j = 1; j <= i__2; ++j) {
z__[i__] *= u[i__ + j * u_dim1] * vt[i__ + j * vt_dim1] / (dsigma[
i__] - dsigma[j]) / (dsigma[i__] + dsigma[j]);
/* L40: */
}
i__2 = *k - 1;
for (j = i__; j <= i__2; ++j) {
z__[i__] *= u[i__ + j * u_dim1] * vt[i__ + j * vt_dim1] / (dsigma[
i__] - dsigma[j + 1]) / (dsigma[i__] + dsigma[j + 1]);
/* L50: */
}
r__2 = sqrt((r__1 = z__[i__], dabs(r__1)));
z__[i__] = r_sign(&r__2, &q[i__ + q_dim1]);
/* L60: */
}
/* Compute left singular vectors of the modified diagonal matrix, */
/* and store related information for the right singular vectors. */
i__1 = *k;
for (i__ = 1; i__ <= i__1; ++i__) {
vt[i__ * vt_dim1 + 1] = z__[1] / u[i__ * u_dim1 + 1] / vt[i__ *
vt_dim1 + 1];
u[i__ * u_dim1 + 1] = -1.f;
i__2 = *k;
for (j = 2; j <= i__2; ++j) {
vt[j + i__ * vt_dim1] = z__[j] / u[j + i__ * u_dim1] / vt[j + i__
* vt_dim1];
u[j + i__ * u_dim1] = dsigma[j] * vt[j + i__ * vt_dim1];
/* L70: */
}
temp = snrm2_(k, &u[i__ * u_dim1 + 1], &c__1);
q[i__ * q_dim1 + 1] = u[i__ * u_dim1 + 1] / temp;
i__2 = *k;
for (j = 2; j <= i__2; ++j) {
jc = idxc[j];
q[j + i__ * q_dim1] = u[jc + i__ * u_dim1] / temp;
/* L80: */
}
/* L90: */
}
/* Update the left singular vector matrix. */
if (*k == 2) {
sgemm_("N", "N", &n, k, k, &c_b13, &u2[u2_offset], ldu2, &q[q_offset],
ldq, &c_b26, &u[u_offset], ldu);
goto L100;
}
if (ctot[1] > 0) {
sgemm_("N", "N", nl, k, &ctot[1], &c_b13, &u2[(u2_dim1 << 1) + 1],
ldu2, &q[q_dim1 + 2], ldq, &c_b26, &u[u_dim1 + 1], ldu);
if (ctot[3] > 0) {
ktemp = ctot[1] + 2 + ctot[2];
sgemm_("N", "N", nl, k, &ctot[3], &c_b13, &u2[ktemp * u2_dim1 + 1]
, ldu2, &q[ktemp + q_dim1], ldq, &c_b13, &u[u_dim1 + 1],
ldu);
}
} else if (ctot[3] > 0) {
ktemp = ctot[1] + 2 + ctot[2];
sgemm_("N", "N", nl, k, &ctot[3], &c_b13, &u2[ktemp * u2_dim1 + 1],
ldu2, &q[ktemp + q_dim1], ldq, &c_b26, &u[u_dim1 + 1], ldu);
} else {
slacpy_("F", nl, k, &u2[u2_offset], ldu2, &u[u_offset], ldu);
}
scopy_(k, &q[q_dim1 + 1], ldq, &u[nlp1 + u_dim1], ldu);
ktemp = ctot[1] + 2;
ctemp = ctot[2] + ctot[3];
sgemm_("N", "N", nr, k, &ctemp, &c_b13, &u2[nlp2 + ktemp * u2_dim1], ldu2,
&q[ktemp + q_dim1], ldq, &c_b26, &u[nlp2 + u_dim1], ldu);
/* Generate the right singular vectors. */
L100:
i__1 = *k;
for (i__ = 1; i__ <= i__1; ++i__) {
temp = snrm2_(k, &vt[i__ * vt_dim1 + 1], &c__1);
q[i__ + q_dim1] = vt[i__ * vt_dim1 + 1] / temp;
i__2 = *k;
for (j = 2; j <= i__2; ++j) {
jc = idxc[j];
q[i__ + j * q_dim1] = vt[jc + i__ * vt_dim1] / temp;
/* L110: */
}
/* L120: */
}
/* Update the right singular vector matrix. */
if (*k == 2) {
sgemm_("N", "N", k, &m, k, &c_b13, &q[q_offset], ldq, &vt2[vt2_offset]
, ldvt2, &c_b26, &vt[vt_offset], ldvt);
return 0;
}
ktemp = ctot[1] + 1;
sgemm_("N", "N", k, &nlp1, &ktemp, &c_b13, &q[q_dim1 + 1], ldq, &vt2[
vt2_dim1 + 1], ldvt2, &c_b26, &vt[vt_dim1 + 1], ldvt);
ktemp = ctot[1] + 2 + ctot[2];
if (ktemp <= *ldvt2) {
sgemm_("N", "N", k, &nlp1, &ctot[3], &c_b13, &q[ktemp * q_dim1 + 1],
ldq, &vt2[ktemp + vt2_dim1], ldvt2, &c_b13, &vt[vt_dim1 + 1],
ldvt);
}
ktemp = ctot[1] + 1;
nrp1 = *nr + *sqre;
if (ktemp > 1) {
i__1 = *k;
for (i__ = 1; i__ <= i__1; ++i__) {
q[i__ + ktemp * q_dim1] = q[i__ + q_dim1];
/* L130: */
}
i__1 = m;
for (i__ = nlp2; i__ <= i__1; ++i__) {
vt2[ktemp + i__ * vt2_dim1] = vt2[i__ * vt2_dim1 + 1];
/* L140: */
}
}
ctemp = ctot[2] + 1 + ctot[3];
sgemm_("N", "N", k, &nrp1, &ctemp, &c_b13, &q[ktemp * q_dim1 + 1], ldq, &
vt2[ktemp + nlp2 * vt2_dim1], ldvt2, &c_b26, &vt[nlp2 * vt_dim1 +
1], ldvt);
return 0;
/* End of SLASD3 */
} /* slasd3_ */