From dec0af8d79430cbea4b7350f73c77681f901a7fd Mon Sep 17 00:00:00 2001 From: Vadim Pisarevsky Date: Tue, 27 Dec 2011 15:56:17 +0000 Subject: [PATCH] implemented invert(A, B, DECOMP_EIG) --- modules/core/src/arithm.cpp | 30 +++------------ modules/core/src/convert.cpp | 10 +---- modules/core/src/lapack.cpp | 71 +++++++++++++++++++++++++----------- 3 files changed, 57 insertions(+), 54 deletions(-) diff --git a/modules/core/src/arithm.cpp b/modules/core/src/arithm.cpp index b5394ae1b..80d3dace2 100644 --- a/modules/core/src/arithm.cpp +++ b/modules/core/src/arithm.cpp @@ -2050,18 +2050,13 @@ static void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t ste if( code == CMP_GT || code == CMP_LE ) { int m = code == CMP_GT ? 0 : 255; - #if CV_SSE2 - __m128i m128, c128; - if( USE_SSE2 ){ - m128 = code == CMP_GT ? _mm_setzero_si128() : _mm_set1_epi8 (0xff); - c128 = _mm_set1_epi8 (0x7f); - } - #endif for( ; size.height--; src1 += step1, src2 += step2, dst += step ) { int x =0; #if CV_SSE2 if( USE_SSE2 ){ + __m128i m128 = code == CMP_GT ? _mm_setzero_si128() : _mm_set1_epi8 (0xff); + __m128i c128 = _mm_set1_epi8 (0x7f); for( ; x <= size.width - 16; x += 16 ) { __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x)); @@ -2085,17 +2080,12 @@ static void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t ste else if( code == CMP_EQ || code == CMP_NE ) { int m = code == CMP_EQ ? 0 : 255; - #if CV_SSE2 - __m128i m128; - if( USE_SSE2 ){ - m128 = code == CMP_EQ ? _mm_setzero_si128() : _mm_set1_epi8 (0xff); - } - #endif for( ; size.height--; src1 += step1, src2 += step2, dst += step ) { int x = 0; #if CV_SSE2 if( USE_SSE2 ){ + __m128i m128 = code == CMP_EQ ? _mm_setzero_si128() : _mm_set1_epi8 (0xff); for( ; x <= size.width - 16; x += 16 ) { __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x)); @@ -2141,17 +2131,12 @@ static void cmp16s(const short* src1, size_t step1, const short* src2, size_t st if( code == CMP_GT || code == CMP_LE ) { int m = code == CMP_GT ? 0 : 255; - #if CV_SSE2 - __m128i m128; - if( USE_SSE2 ){ - m128 = code == CMP_GT ? _mm_setzero_si128() : _mm_set1_epi16 (0xffff); - } - #endif for( ; size.height--; src1 += step1, src2 += step2, dst += step ) { int x =0; #if CV_SSE2 if( USE_SSE2){// + __m128i m128 = code == CMP_GT ? _mm_setzero_si128() : _mm_set1_epi16 (0xffff); for( ; x <= size.width - 16; x += 16 ) { __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x)); @@ -2184,17 +2169,12 @@ static void cmp16s(const short* src1, size_t step1, const short* src2, size_t st else if( code == CMP_EQ || code == CMP_NE ) { int m = code == CMP_EQ ? 0 : 255; - #if CV_SSE2 - __m128i m128; - if( USE_SSE2 ){ - m128 = code == CMP_EQ ? _mm_setzero_si128() : _mm_set1_epi16 (0xffff); - } - #endif for( ; size.height--; src1 += step1, src2 += step2, dst += step ) { int x = 0; #if CV_SSE2 if( USE_SSE2 ){ + __m128i m128 = code == CMP_EQ ? _mm_setzero_si128() : _mm_set1_epi16 (0xffff); for( ; x <= size.width - 16; x += 16 ) { __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x)); diff --git a/modules/core/src/convert.cpp b/modules/core/src/convert.cpp index 8bf123325..25418ca19 100644 --- a/modules/core/src/convert.cpp +++ b/modules/core/src/convert.cpp @@ -615,20 +615,14 @@ cvtScale_( const short* src, size_t sstep, sstep /= sizeof(src[0]); dstep /= sizeof(dst[0]); - #if CV_SSE2 - __m128 scale128, shift128; - if(USE_SSE2){ - scale128 = _mm_set1_ps (scale); - shift128 = _mm_set1_ps (shift); - } - #endif - for( ; size.height--; src += sstep, dst += dstep ) { int x = 0; #if CV_SSE2 if(USE_SSE2) { + __m128 scale128 = _mm_set1_ps (scale); + __m128 shift128 = _mm_set1_ps (shift); for(; x <= size.width - 8; x += 8 ) { __m128i r0 = _mm_loadl_epi64((const __m128i*)(src + x)); diff --git a/modules/core/src/lapack.cpp b/modules/core/src/lapack.cpp index 52601b711..6af62fa78 100644 --- a/modules/core/src/lapack.cpp +++ b/modules/core/src/lapack.cpp @@ -951,34 +951,61 @@ double cv::invert( InputArray _src, OutputArray _dst, int method ) bool result = false; Mat src = _src.getMat(); int type = src.type(); - - CV_Assert( method == DECOMP_LU || method == DECOMP_CHOLESKY || method == DECOMP_SVD ); - _dst.create( src.cols, src.rows, type ); - Mat dst = _dst.getMat(); + size_t esz = CV_ELEM_SIZE(type); + int m = src.rows, n = src.cols; if( method == DECOMP_SVD ) { - int n = std::min(src.rows, src.cols); - SVD svd(src); - svd.backSubst(Mat(), dst); - + int nm = std::min(m, n); + + AutoBuffer _buf((m*nm + nm + nm*n)*esz + sizeof(double)); + uchar* buf = alignPtr((uchar*)_buf, (int)esz); + Mat u(m, nm, type, buf); + Mat w(nm, 1, type, u.data + m*nm*esz); + Mat vt(nm, n, type, w.data + nm*esz); + + SVD::compute(src, w, u, vt); + SVD::backSubst(w, u, vt, Mat(), _dst); return type == CV_32F ? - (((float*)svd.w.data)[0] >= FLT_EPSILON ? - ((float*)svd.w.data)[n-1]/((float*)svd.w.data)[0] : 0) : - (((double*)svd.w.data)[0] >= DBL_EPSILON ? - ((double*)svd.w.data)[n-1]/((double*)svd.w.data)[0] : 0); + (((float*)w.data)[0] >= FLT_EPSILON ? + ((float*)w.data)[n-1]/((float*)w.data)[0] : 0) : + (((double*)w.data)[0] >= DBL_EPSILON ? + ((double*)w.data)[n-1]/((double*)w.data)[0] : 0); } - - CV_Assert( src.rows == src.cols && (type == CV_32F || type == CV_64F)); - if( src.rows <= 3 ) + CV_Assert( m == n && (type == CV_32F || type == CV_64F)); + + if( method == DECOMP_EIG ) + { + AutoBuffer _buf((n*n*2 + n)*esz + sizeof(double)); + uchar* buf = alignPtr((uchar*)_buf, (int)esz); + Mat u(n, n, type, buf); + Mat w(n, 1, type, u.data + n*n*esz); + Mat vt(n, n, type, w.data + n*esz); + + eigen(src, w, vt); + transpose(vt, u); + SVD::backSubst(w, u, vt, Mat(), _dst); + return type == CV_32F ? + (((float*)w.data)[0] >= FLT_EPSILON ? + ((float*)w.data)[n-1]/((float*)w.data)[0] : 0) : + (((double*)w.data)[0] >= DBL_EPSILON ? + ((double*)w.data)[n-1]/((double*)w.data)[0] : 0); + } + + CV_Assert( method == DECOMP_LU || method == DECOMP_CHOLESKY ); + + _dst.create( n, n, type ); + Mat dst = _dst.getMat(); + + if( n <= 3 ) { uchar* srcdata = src.data; uchar* dstdata = dst.data; size_t srcstep = src.step; size_t dststep = dst.step; - if( src.rows == 2 ) + if( n == 2 ) { if( type == CV_32FC1 ) { @@ -1017,7 +1044,7 @@ double cv::invert( InputArray _src, OutputArray _dst, int method ) } } } - else if( src.rows == 3 ) + else if( n == 3 ) { if( type == CV_32FC1 ) { @@ -1074,7 +1101,7 @@ double cv::invert( InputArray _src, OutputArray _dst, int method ) } else { - assert( src.rows == 1 ); + assert( n == 1 ); if( type == CV_32FC1 ) { @@ -1100,7 +1127,7 @@ double cv::invert( InputArray _src, OutputArray _dst, int method ) return result; } - int n = dst.cols, elem_size = CV_ELEM_SIZE(type); + int elem_size = CV_ELEM_SIZE(type); AutoBuffer buf(n*n*elem_size); Mat src1(n, n, type, (uchar*)buf); src.copyTo(src1); @@ -1621,7 +1648,8 @@ cvInvert( const CvArr* srcarr, CvArr* dstarr, int method ) CV_Assert( src.type() == dst.type() && src.rows == dst.cols && src.cols == dst.rows ); return cv::invert( src, dst, method == CV_CHOLESKY ? cv::DECOMP_CHOLESKY : - method == CV_SVD || method == CV_SVD_SYM ? cv::DECOMP_SVD : cv::DECOMP_LU ); + method == CV_SVD ? cv::DECOMP_SVD : + method == CV_SVD_SYM ? cv::DECOMP_EIG : cv::DECOMP_LU ); } @@ -1634,7 +1662,8 @@ cvSolve( const CvArr* Aarr, const CvArr* barr, CvArr* xarr, int method ) bool is_normal = (method & CV_NORMAL) != 0; method &= ~CV_NORMAL; return cv::solve( A, b, x, (method == CV_CHOLESKY ? cv::DECOMP_CHOLESKY : - method == CV_SVD || method == CV_SVD_SYM ? cv::DECOMP_SVD : + method == CV_SVD ? cv::DECOMP_SVD : + method == CV_SVD_SYM ? cv::DECOMP_EIG : A.rows > A.cols ? cv::DECOMP_QR : cv::DECOMP_LU) + (is_normal ? cv::DECOMP_NORMAL : 0) ); }