fixed 2 bugs in the recently modified Lapack functions

This commit is contained in:
Vadim Pisarevsky 2010-08-31 12:39:00 +00:00
parent e65234b8be
commit bf94db5b16
3 changed files with 60 additions and 28 deletions

View File

@ -181,11 +181,14 @@
if( s == 0. )
continue;
s *= alpha;
for( j = 0; j <= m - 2; j += 2 )
for( j = 0; j <= m - 4; j += 4 )
{
doublereal t0 = y[j] + s*a[j];
doublereal t1 = y[j+1] + s*a[j+1];
y[j] = t0; y[j+1] = t1;
t0 = y[j+2] + s*a[j+2];
t1 = y[j+3] + s*a[j+3];
y[j+2] = t0; y[j+3] = t1;
}
for( ; j < m; j++ )
@ -212,8 +215,8 @@
for( i = 0; i < n; i++, a += lda )
{
doublereal s = 0;
for( j = 0; j <= m - 2; j += 2 )
s += x[j]*a[j] + x[j+1]*a[j+1];
for( j = 0; j <= m - 4; j += 4 )
s += x[j]*a[j] + x[j+1]*a[j+1] + x[j+2]*a[j+2] + x[j+3]*a[j+3];
for( ; j < m; j++ )
s += x[j]*a[j];
y[i*incy] += alpha*s;

View File

@ -163,37 +163,65 @@
;
else if( trans == 'N' )
{
for( i = 0; i < n; i++, a += lda )
if( incy == 1 )
{
real s = x[i*incx];
if( s == 0.f )
continue;
s *= alpha;
for( j = 0; j <= m - 4; j += 4 )
for( i = 0; i < n; i++, a += lda )
{
real t0 = y[j] + s*a[j];
real t1 = y[j+1] + s*a[j+1];
y[j] = t0; y[j+1] = t1;
t0 = y[j+2] + s*a[j+2];
t1 = y[j+3] + s*a[j+3];
y[j+2] = t0; y[j+3] = t1;
}
real s = x[i*incx];
if( s == 0.f )
continue;
s *= alpha;
for( j = 0; j <= m - 4; j += 4 )
{
real t0 = y[j] + s*a[j];
real t1 = y[j+1] + s*a[j+1];
y[j] = t0; y[j+1] = t1;
t0 = y[j+2] + s*a[j+2];
t1 = y[j+3] + s*a[j+3];
y[j+2] = t0; y[j+3] = t1;
}
for( ; j < m; j++ )
y[j] += s*a[j];
for( ; j < m; j++ )
y[j] += s*a[j];
}
}
else
{
for( i = 0; i < n; i++, a += lda )
{
real s = x[i*incx];
if( s == 0. )
continue;
s *= alpha;
for( j = 0; j < m; j++ )
y[j*incy] += s*a[j];
}
}
}
else
{
for( i = 0; i < n; i++, a += lda )
if( incx == 1 )
{
real s = 0;
for( j = 0; j <= m - 4; j += 4 )
s += x[j]*a[j] + x[j+1]*a[j+1] + x[j+2]*a[j+2] + x[j+3]*a[j+3];
for( ; j < m; j++ )
s += x[j]*a[j];
y[i*incy] += alpha*s;
for( i = 0; i < n; i++, a += lda )
{
real s = 0;
for( j = 0; j <= m - 4; j += 4 )
s += x[j]*a[j] + x[j+1]*a[j+1] + x[j+2]*a[j+2] + x[j+3]*a[j+3];
for( ; j < m; j++ )
s += x[j]*a[j];
y[i*incy] += alpha*s;
}
}
else
{
for( i = 0; i < n; i++, a += lda )
{
real s = 0;
for( j = 0; j < m; j++ )
s += x[j*incx]*a[j];
y[i*incy] += alpha*s;
}
}
}

View File

@ -1327,6 +1327,7 @@ static void _SVDcompute( const Mat& a, Mat& w, Mat* u, Mat* vt, int flags )
if(u) u->release();
if(vt) vt->release();
u = vt = 0;
compute_uv = false;
}
if( compute_uv )
@ -1417,13 +1418,13 @@ static void _SVDcompute( const Mat& a, Mat& w, Mat* u, Mat* vt, int flags )
if( type == CV_32F )
{
sgesdd_(mode, &n, &m, (float*)_a.data, &lda, (float*)w.data,
(float*)vt->data, &ldv, (float*)u->data, &ldu,
vt ? (float*)vt->data : (float*)&v1, &ldv, u ? (float*)u->data : (float*)&u1, &ldu,
(float*)(buffer + work_ofs), &lwork, (integer*)(buffer + iwork_ofs), &info );
}
else
{
dgesdd_(mode, &n, &m, (double*)_a.data, &lda, (double*)w.data,
(double*)vt->data, &ldv, (double*)u->data, &ldu,
vt ? (double*)vt->data : &v1, &ldv, u ? (double*)u->data : &u1, &ldu,
(double*)(buffer + work_ofs), &lwork, (integer*)(buffer + iwork_ofs), &info );
}
CV_Assert(info >= 0);