fixed 2 bugs in the recently modified Lapack functions
This commit is contained in:
parent
e65234b8be
commit
bf94db5b16
9
3rdparty/lapack/dgemv_custom.c
vendored
9
3rdparty/lapack/dgemv_custom.c
vendored
@ -181,11 +181,14 @@
|
||||
if( s == 0. )
|
||||
continue;
|
||||
s *= alpha;
|
||||
for( j = 0; j <= m - 2; j += 2 )
|
||||
for( j = 0; j <= m - 4; j += 4 )
|
||||
{
|
||||
doublereal t0 = y[j] + s*a[j];
|
||||
doublereal t1 = y[j+1] + s*a[j+1];
|
||||
y[j] = t0; y[j+1] = t1;
|
||||
t0 = y[j+2] + s*a[j+2];
|
||||
t1 = y[j+3] + s*a[j+3];
|
||||
y[j+2] = t0; y[j+3] = t1;
|
||||
}
|
||||
|
||||
for( ; j < m; j++ )
|
||||
@ -212,8 +215,8 @@
|
||||
for( i = 0; i < n; i++, a += lda )
|
||||
{
|
||||
doublereal s = 0;
|
||||
for( j = 0; j <= m - 2; j += 2 )
|
||||
s += x[j]*a[j] + x[j+1]*a[j+1];
|
||||
for( j = 0; j <= m - 4; j += 4 )
|
||||
s += x[j]*a[j] + x[j+1]*a[j+1] + x[j+2]*a[j+2] + x[j+3]*a[j+3];
|
||||
for( ; j < m; j++ )
|
||||
s += x[j]*a[j];
|
||||
y[i*incy] += alpha*s;
|
||||
|
74
3rdparty/lapack/sgemv_custom.c
vendored
74
3rdparty/lapack/sgemv_custom.c
vendored
@ -163,37 +163,65 @@
|
||||
;
|
||||
else if( trans == 'N' )
|
||||
{
|
||||
for( i = 0; i < n; i++, a += lda )
|
||||
if( incy == 1 )
|
||||
{
|
||||
real s = x[i*incx];
|
||||
if( s == 0.f )
|
||||
continue;
|
||||
s *= alpha;
|
||||
|
||||
for( j = 0; j <= m - 4; j += 4 )
|
||||
for( i = 0; i < n; i++, a += lda )
|
||||
{
|
||||
real t0 = y[j] + s*a[j];
|
||||
real t1 = y[j+1] + s*a[j+1];
|
||||
y[j] = t0; y[j+1] = t1;
|
||||
t0 = y[j+2] + s*a[j+2];
|
||||
t1 = y[j+3] + s*a[j+3];
|
||||
y[j+2] = t0; y[j+3] = t1;
|
||||
}
|
||||
real s = x[i*incx];
|
||||
if( s == 0.f )
|
||||
continue;
|
||||
s *= alpha;
|
||||
|
||||
for( j = 0; j <= m - 4; j += 4 )
|
||||
{
|
||||
real t0 = y[j] + s*a[j];
|
||||
real t1 = y[j+1] + s*a[j+1];
|
||||
y[j] = t0; y[j+1] = t1;
|
||||
t0 = y[j+2] + s*a[j+2];
|
||||
t1 = y[j+3] + s*a[j+3];
|
||||
y[j+2] = t0; y[j+3] = t1;
|
||||
}
|
||||
|
||||
for( ; j < m; j++ )
|
||||
y[j] += s*a[j];
|
||||
for( ; j < m; j++ )
|
||||
y[j] += s*a[j];
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for( i = 0; i < n; i++, a += lda )
|
||||
{
|
||||
real s = x[i*incx];
|
||||
if( s == 0. )
|
||||
continue;
|
||||
s *= alpha;
|
||||
for( j = 0; j < m; j++ )
|
||||
y[j*incy] += s*a[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for( i = 0; i < n; i++, a += lda )
|
||||
if( incx == 1 )
|
||||
{
|
||||
real s = 0;
|
||||
for( j = 0; j <= m - 4; j += 4 )
|
||||
s += x[j]*a[j] + x[j+1]*a[j+1] + x[j+2]*a[j+2] + x[j+3]*a[j+3];
|
||||
for( ; j < m; j++ )
|
||||
s += x[j]*a[j];
|
||||
y[i*incy] += alpha*s;
|
||||
for( i = 0; i < n; i++, a += lda )
|
||||
{
|
||||
real s = 0;
|
||||
for( j = 0; j <= m - 4; j += 4 )
|
||||
s += x[j]*a[j] + x[j+1]*a[j+1] + x[j+2]*a[j+2] + x[j+3]*a[j+3];
|
||||
for( ; j < m; j++ )
|
||||
s += x[j]*a[j];
|
||||
y[i*incy] += alpha*s;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for( i = 0; i < n; i++, a += lda )
|
||||
{
|
||||
real s = 0;
|
||||
for( j = 0; j < m; j++ )
|
||||
s += x[j*incx]*a[j];
|
||||
y[i*incy] += alpha*s;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1327,6 +1327,7 @@ static void _SVDcompute( const Mat& a, Mat& w, Mat* u, Mat* vt, int flags )
|
||||
if(u) u->release();
|
||||
if(vt) vt->release();
|
||||
u = vt = 0;
|
||||
compute_uv = false;
|
||||
}
|
||||
|
||||
if( compute_uv )
|
||||
@ -1417,13 +1418,13 @@ static void _SVDcompute( const Mat& a, Mat& w, Mat* u, Mat* vt, int flags )
|
||||
if( type == CV_32F )
|
||||
{
|
||||
sgesdd_(mode, &n, &m, (float*)_a.data, &lda, (float*)w.data,
|
||||
(float*)vt->data, &ldv, (float*)u->data, &ldu,
|
||||
vt ? (float*)vt->data : (float*)&v1, &ldv, u ? (float*)u->data : (float*)&u1, &ldu,
|
||||
(float*)(buffer + work_ofs), &lwork, (integer*)(buffer + iwork_ofs), &info );
|
||||
}
|
||||
else
|
||||
{
|
||||
dgesdd_(mode, &n, &m, (double*)_a.data, &lda, (double*)w.data,
|
||||
(double*)vt->data, &ldv, (double*)u->data, &ldu,
|
||||
vt ? (double*)vt->data : &v1, &ldv, u ? (double*)u->data : &u1, &ldu,
|
||||
(double*)(buffer + work_ofs), &lwork, (integer*)(buffer + iwork_ofs), &info );
|
||||
}
|
||||
CV_Assert(info >= 0);
|
||||
|
Loading…
Reference in New Issue
Block a user