diff --git a/3rdparty/lapack/dgemv_custom.c b/3rdparty/lapack/dgemv_custom.c index 6edee4e2b..70b5ef4d2 100644 --- a/3rdparty/lapack/dgemv_custom.c +++ b/3rdparty/lapack/dgemv_custom.c @@ -181,11 +181,14 @@ if( s == 0. ) continue; s *= alpha; - for( j = 0; j <= m - 2; j += 2 ) + for( j = 0; j <= m - 4; j += 4 ) { doublereal t0 = y[j] + s*a[j]; doublereal t1 = y[j+1] + s*a[j+1]; y[j] = t0; y[j+1] = t1; + t0 = y[j+2] + s*a[j+2]; + t1 = y[j+3] + s*a[j+3]; + y[j+2] = t0; y[j+3] = t1; } for( ; j < m; j++ ) @@ -212,8 +215,8 @@ for( i = 0; i < n; i++, a += lda ) { doublereal s = 0; - for( j = 0; j <= m - 2; j += 2 ) - s += x[j]*a[j] + x[j+1]*a[j+1]; + for( j = 0; j <= m - 4; j += 4 ) + s += x[j]*a[j] + x[j+1]*a[j+1] + x[j+2]*a[j+2] + x[j+3]*a[j+3]; for( ; j < m; j++ ) s += x[j]*a[j]; y[i*incy] += alpha*s; diff --git a/3rdparty/lapack/sgemv_custom.c b/3rdparty/lapack/sgemv_custom.c index 0cb7a9750..eff441863 100644 --- a/3rdparty/lapack/sgemv_custom.c +++ b/3rdparty/lapack/sgemv_custom.c @@ -163,37 +163,65 @@ ; else if( trans == 'N' ) { - for( i = 0; i < n; i++, a += lda ) + if( incy == 1 ) { - real s = x[i*incx]; - if( s == 0.f ) - continue; - s *= alpha; - - for( j = 0; j <= m - 4; j += 4 ) + for( i = 0; i < n; i++, a += lda ) { - real t0 = y[j] + s*a[j]; - real t1 = y[j+1] + s*a[j+1]; - y[j] = t0; y[j+1] = t1; - t0 = y[j+2] + s*a[j+2]; - t1 = y[j+3] + s*a[j+3]; - y[j+2] = t0; y[j+3] = t1; - } + real s = x[i*incx]; + if( s == 0.f ) + continue; + s *= alpha; + + for( j = 0; j <= m - 4; j += 4 ) + { + real t0 = y[j] + s*a[j]; + real t1 = y[j+1] + s*a[j+1]; + y[j] = t0; y[j+1] = t1; + t0 = y[j+2] + s*a[j+2]; + t1 = y[j+3] + s*a[j+3]; + y[j+2] = t0; y[j+3] = t1; + } - for( ; j < m; j++ ) - y[j] += s*a[j]; + for( ; j < m; j++ ) + y[j] += s*a[j]; + } + } + else + { + for( i = 0; i < n; i++, a += lda ) + { + real s = x[i*incx]; + if( s == 0. ) + continue; + s *= alpha; + for( j = 0; j < m; j++ ) + y[j*incy] += s*a[j]; + } } } else { - for( i = 0; i < n; i++, a += lda ) + if( incx == 1 ) { - real s = 0; - for( j = 0; j <= m - 4; j += 4 ) - s += x[j]*a[j] + x[j+1]*a[j+1] + x[j+2]*a[j+2] + x[j+3]*a[j+3]; - for( ; j < m; j++ ) - s += x[j]*a[j]; - y[i*incy] += alpha*s; + for( i = 0; i < n; i++, a += lda ) + { + real s = 0; + for( j = 0; j <= m - 4; j += 4 ) + s += x[j]*a[j] + x[j+1]*a[j+1] + x[j+2]*a[j+2] + x[j+3]*a[j+3]; + for( ; j < m; j++ ) + s += x[j]*a[j]; + y[i*incy] += alpha*s; + } + } + else + { + for( i = 0; i < n; i++, a += lda ) + { + real s = 0; + for( j = 0; j < m; j++ ) + s += x[j*incx]*a[j]; + y[i*incy] += alpha*s; + } } } diff --git a/modules/core/src/lapack.cpp b/modules/core/src/lapack.cpp index 6d9a876ef..7a95fb4ee 100644 --- a/modules/core/src/lapack.cpp +++ b/modules/core/src/lapack.cpp @@ -1327,6 +1327,7 @@ static void _SVDcompute( const Mat& a, Mat& w, Mat* u, Mat* vt, int flags ) if(u) u->release(); if(vt) vt->release(); u = vt = 0; + compute_uv = false; } if( compute_uv ) @@ -1417,13 +1418,13 @@ static void _SVDcompute( const Mat& a, Mat& w, Mat* u, Mat* vt, int flags ) if( type == CV_32F ) { sgesdd_(mode, &n, &m, (float*)_a.data, &lda, (float*)w.data, - (float*)vt->data, &ldv, (float*)u->data, &ldu, + vt ? (float*)vt->data : (float*)&v1, &ldv, u ? (float*)u->data : (float*)&u1, &ldu, (float*)(buffer + work_ofs), &lwork, (integer*)(buffer + iwork_ofs), &info ); } else { dgesdd_(mode, &n, &m, (double*)_a.data, &lda, (double*)w.data, - (double*)vt->data, &ldv, (double*)u->data, &ldu, + vt ? (double*)vt->data : &v1, &ldv, u ? (double*)u->data : &u1, &ldu, (double*)(buffer + work_ofs), &lwork, (integer*)(buffer + iwork_ofs), &info ); } CV_Assert(info >= 0);