From dec0af8d79430cbea4b7350f73c77681f901a7fd Mon Sep 17 00:00:00 2001
From: Vadim Pisarevsky <no@email>
Date: Tue, 27 Dec 2011 15:56:17 +0000
Subject: [PATCH] implemented invert(A, B, DECOMP_EIG)

---
 modules/core/src/arithm.cpp  | 30 +++------------
 modules/core/src/convert.cpp | 10 +----
 modules/core/src/lapack.cpp  | 71 +++++++++++++++++++++++++-----------
 3 files changed, 57 insertions(+), 54 deletions(-)

diff --git a/modules/core/src/arithm.cpp b/modules/core/src/arithm.cpp
index b5394ae1b..80d3dace2 100644
--- a/modules/core/src/arithm.cpp
+++ b/modules/core/src/arithm.cpp
@@ -2050,18 +2050,13 @@ static void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t ste
     if( code == CMP_GT || code == CMP_LE )
     {
         int m = code == CMP_GT ? 0 : 255;
-		#if CV_SSE2
-		__m128i m128, c128;
-		if( USE_SSE2 ){
-		  m128 =  code == CMP_GT ? _mm_setzero_si128() : _mm_set1_epi8 (0xff);
-          c128 = _mm_set1_epi8 (0x7f);
-		}
-	   #endif
         for( ; size.height--; src1 += step1, src2 += step2, dst += step )
         {
             int x =0;
 		    #if CV_SSE2
 		    if( USE_SSE2 ){
+                __m128i m128 = code == CMP_GT ? _mm_setzero_si128() : _mm_set1_epi8 (0xff);
+                __m128i c128 = _mm_set1_epi8 (0x7f);
 				for( ; x <= size.width - 16; x += 16 )
 				{
 					__m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x));
@@ -2085,17 +2080,12 @@ static void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t ste
     else if( code == CMP_EQ || code == CMP_NE )
     {
         int m = code == CMP_EQ ? 0 : 255;
-		#if CV_SSE2
-		__m128i m128;
-		if( USE_SSE2 ){
-		  m128 =  code == CMP_EQ ? _mm_setzero_si128() : _mm_set1_epi8 (0xff);
-		}
-	   #endif
 		for( ; size.height--; src1 += step1, src2 += step2, dst += step )
         {
             int x = 0;
 		    #if CV_SSE2
 		    if( USE_SSE2 ){
+                __m128i m128 =  code == CMP_EQ ? _mm_setzero_si128() : _mm_set1_epi8 (0xff);
 				for( ; x <= size.width - 16; x += 16 )
 				{
 					__m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x));
@@ -2141,17 +2131,12 @@ static void cmp16s(const short* src1, size_t step1, const short* src2, size_t st
     if( code == CMP_GT || code == CMP_LE )
     {
         int m = code == CMP_GT ? 0 : 255;
-		#if CV_SSE2
-		__m128i m128;
-		if( USE_SSE2 ){
-		  m128 =  code == CMP_GT ? _mm_setzero_si128() : _mm_set1_epi16 (0xffff);
-		}
-	   #endif
         for( ; size.height--; src1 += step1, src2 += step2, dst += step )
         {
             int x =0;
 		    #if CV_SSE2
 		    if( USE_SSE2){//
+                __m128i m128 =  code == CMP_GT ? _mm_setzero_si128() : _mm_set1_epi16 (0xffff);
 				for( ; x <= size.width - 16; x += 16 )
 				{
 					__m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x));
@@ -2184,17 +2169,12 @@ static void cmp16s(const short* src1, size_t step1, const short* src2, size_t st
     else if( code == CMP_EQ || code == CMP_NE )
     {
         int m = code == CMP_EQ ? 0 : 255;
-		#if CV_SSE2
-		__m128i m128;
-		if( USE_SSE2 ){
-		  m128 =  code == CMP_EQ ? _mm_setzero_si128() : _mm_set1_epi16 (0xffff);
-		}
-	   #endif
 		for( ; size.height--; src1 += step1, src2 += step2, dst += step )
         {
             int x = 0;
 		    #if CV_SSE2
 		    if( USE_SSE2 ){
+                __m128i m128 =  code == CMP_EQ ? _mm_setzero_si128() : _mm_set1_epi16 (0xffff);
 				for( ; x <= size.width - 16; x += 16 )
 				{
 					__m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x));
diff --git a/modules/core/src/convert.cpp b/modules/core/src/convert.cpp
index 8bf123325..25418ca19 100644
--- a/modules/core/src/convert.cpp
+++ b/modules/core/src/convert.cpp
@@ -615,20 +615,14 @@ cvtScale_<short, short, float>( const short* src, size_t sstep,
     sstep /= sizeof(src[0]);
     dstep /= sizeof(dst[0]);
 
-    #if CV_SSE2
-		__m128 scale128, shift128;
-		if(USE_SSE2){
-			scale128 = _mm_set1_ps (scale);
-			shift128 = _mm_set1_ps (shift);
-		}
-    #endif
-
 	for( ; size.height--; src += sstep, dst += dstep )
     {
         int x = 0;
 		#if CV_SSE2
 			if(USE_SSE2)
             {
+                __m128 scale128 = _mm_set1_ps (scale);
+                __m128 shift128 = _mm_set1_ps (shift);
 				for(; x <= size.width - 8; x += 8 )
 				{
 					__m128i r0 = _mm_loadl_epi64((const __m128i*)(src + x));
diff --git a/modules/core/src/lapack.cpp b/modules/core/src/lapack.cpp
index 52601b711..6af62fa78 100644
--- a/modules/core/src/lapack.cpp
+++ b/modules/core/src/lapack.cpp
@@ -951,34 +951,61 @@ double cv::invert( InputArray _src, OutputArray _dst, int method )
     bool result = false;
     Mat src = _src.getMat();
     int type = src.type();
-
-    CV_Assert( method == DECOMP_LU || method == DECOMP_CHOLESKY || method == DECOMP_SVD );
-    _dst.create( src.cols, src.rows, type );
-    Mat dst = _dst.getMat();
+    size_t esz = CV_ELEM_SIZE(type);
+    int m = src.rows, n = src.cols;
     
     if( method == DECOMP_SVD )
     {
-        int n = std::min(src.rows, src.cols);
-        SVD svd(src);
-        svd.backSubst(Mat(), dst);
-
+        int nm = std::min(m, n);
+        
+        AutoBuffer<uchar> _buf((m*nm + nm + nm*n)*esz + sizeof(double));
+        uchar* buf = alignPtr((uchar*)_buf, (int)esz);
+        Mat u(m, nm, type, buf);
+        Mat w(nm, 1, type, u.data + m*nm*esz);
+        Mat vt(nm, n, type, w.data + nm*esz);
+        
+        SVD::compute(src, w, u, vt);
+        SVD::backSubst(w, u, vt, Mat(), _dst);
         return type == CV_32F ?
-            (((float*)svd.w.data)[0] >= FLT_EPSILON ?
-            ((float*)svd.w.data)[n-1]/((float*)svd.w.data)[0] : 0) :
-            (((double*)svd.w.data)[0] >= DBL_EPSILON ?
-            ((double*)svd.w.data)[n-1]/((double*)svd.w.data)[0] : 0);
+            (((float*)w.data)[0] >= FLT_EPSILON ?
+             ((float*)w.data)[n-1]/((float*)w.data)[0] : 0) :
+            (((double*)w.data)[0] >= DBL_EPSILON ?
+             ((double*)w.data)[n-1]/((double*)w.data)[0] : 0);
     }
-
-    CV_Assert( src.rows == src.cols && (type == CV_32F || type == CV_64F));
     
-    if( src.rows <= 3 )
+    CV_Assert( m == n && (type == CV_32F || type == CV_64F));
+    
+    if( method == DECOMP_EIG )
+    {
+        AutoBuffer<uchar> _buf((n*n*2 + n)*esz + sizeof(double));
+        uchar* buf = alignPtr((uchar*)_buf, (int)esz);
+        Mat u(n, n, type, buf);
+        Mat w(n, 1, type, u.data + n*n*esz);
+        Mat vt(n, n, type, w.data + n*esz);
+        
+        eigen(src, w, vt);
+        transpose(vt, u);
+        SVD::backSubst(w, u, vt, Mat(), _dst);
+        return type == CV_32F ?
+        (((float*)w.data)[0] >= FLT_EPSILON ?
+         ((float*)w.data)[n-1]/((float*)w.data)[0] : 0) :
+        (((double*)w.data)[0] >= DBL_EPSILON ?
+         ((double*)w.data)[n-1]/((double*)w.data)[0] : 0);
+    }
+    
+    CV_Assert( method == DECOMP_LU || method == DECOMP_CHOLESKY );
+    
+    _dst.create( n, n, type );
+    Mat dst = _dst.getMat();
+
+    if( n <= 3 )
     {
         uchar* srcdata = src.data;
         uchar* dstdata = dst.data;
         size_t srcstep = src.step;
         size_t dststep = dst.step;
 
-        if( src.rows == 2 )
+        if( n == 2 )
         {
             if( type == CV_32FC1 )
             {
@@ -1017,7 +1044,7 @@ double cv::invert( InputArray _src, OutputArray _dst, int method )
                 }
             }
         }
-        else if( src.rows == 3 )
+        else if( n == 3 )
         {
             if( type == CV_32FC1 )
             {
@@ -1074,7 +1101,7 @@ double cv::invert( InputArray _src, OutputArray _dst, int method )
         }
         else
         {
-            assert( src.rows == 1 );
+            assert( n == 1 );
 
             if( type == CV_32FC1 )
             {
@@ -1100,7 +1127,7 @@ double cv::invert( InputArray _src, OutputArray _dst, int method )
         return result;
     }
 
-    int n = dst.cols, elem_size = CV_ELEM_SIZE(type);
+    int elem_size = CV_ELEM_SIZE(type);
     AutoBuffer<uchar> buf(n*n*elem_size);
     Mat src1(n, n, type, (uchar*)buf);
     src.copyTo(src1);
@@ -1621,7 +1648,8 @@ cvInvert( const CvArr* srcarr, CvArr* dstarr, int method )
 
     CV_Assert( src.type() == dst.type() && src.rows == dst.cols && src.cols == dst.rows );
     return cv::invert( src, dst, method == CV_CHOLESKY ? cv::DECOMP_CHOLESKY :
-        method == CV_SVD || method == CV_SVD_SYM ? cv::DECOMP_SVD : cv::DECOMP_LU );
+                      method == CV_SVD ? cv::DECOMP_SVD :
+                      method == CV_SVD_SYM ? cv::DECOMP_EIG : cv::DECOMP_LU );
 }
 
 
@@ -1634,7 +1662,8 @@ cvSolve( const CvArr* Aarr, const CvArr* barr, CvArr* xarr, int method )
     bool is_normal = (method & CV_NORMAL) != 0;
     method &= ~CV_NORMAL;
     return cv::solve( A, b, x, (method == CV_CHOLESKY ? cv::DECOMP_CHOLESKY :
-        method == CV_SVD || method == CV_SVD_SYM ? cv::DECOMP_SVD :
+                                method == CV_SVD ? cv::DECOMP_SVD :
+                                method == CV_SVD_SYM ? cv::DECOMP_EIG :
         A.rows > A.cols ? cv::DECOMP_QR : cv::DECOMP_LU) + (is_normal ? cv::DECOMP_NORMAL : 0) );
 }