merged 2.4 into trunk

2012-04-30 14:33:52 +00:00
parent 3f1c6d7357
commit d5a0088bbe
194 changed files with 10158 additions and 8225 deletions
--- a/modules/core/src/algorithm.cpp
+++ b/modules/core/src/algorithm.cpp
@@ -251,6 +251,41 @@ void Algorithm::set(const char* name, const Ptr<Algorithm>& value)
    info()->set(this, name, ParamType<Algorithm>::type, &value);
 }
    
+int Algorithm::getInt(const string& name) const
+{
+    return get<int>(name);
+}
+    
+double Algorithm::getDouble(const string& name) const
+{
+    return get<double>(name);
+}
+
+bool Algorithm::getBool(const string& name) const
+{
+    return get<bool>(name);
+}
+
+string Algorithm::getString(const string& name) const
+{
+    return get<string>(name);
+}
+
+Mat Algorithm::getMat(const string& name) const
+{
+    return get<Mat>(name);
+}
+
+vector<Mat> Algorithm::getMatVector(const string& name) const
+{
+    return get<vector<Mat> >(name);
+}
+
+Ptr<Algorithm> Algorithm::getAlgorithm(const string& name) const
+{
+    return get<Algorithm>(name);
+}
+    
 string Algorithm::paramHelp(const string& name) const
 {
    return info()->paramHelp(name.c_str());
@@ -296,9 +331,9 @@ AlgorithmInfo::~AlgorithmInfo()
    
 void AlgorithmInfo::write(const Algorithm* algo, FileStorage& fs) const
 {
-    size_t i = 0, n = data->params.vec.size();
+    size_t i = 0, nparams = data->params.vec.size();
    cv::write(fs, "name", algo->name());
-    for( i = 0; i < n; i++ )
+    for( i = 0; i < nparams; i++ )
    {
        const Param& p = data->params.vec[i].second;
        const string& pname = data->params.vec[i].first;
@@ -327,9 +362,10 @@ void AlgorithmInfo::write(const Algorithm* algo, FileStorage& fs) const

 void AlgorithmInfo::read(Algorithm* algo, const FileNode& fn) const
 {
-    size_t i = 0, n = data->params.vec.size();
+    size_t i = 0, nparams = data->params.vec.size();
+    AlgorithmInfo* info = algo->info();
    
-    for( i = 0; i < n; i++ )
+    for( i = 0; i < nparams; i++ )
    {
        const Param& p = data->params.vec[i].second;
        const string& pname = data->params.vec[i].first;
@@ -337,31 +373,43 @@ void AlgorithmInfo::read(Algorithm* algo, const FileNode& fn) const
        if( n.empty() )
            continue;
        if( p.type == Param::INT )
-            algo->set(pname, (int)n);
+        {
+            int val = (int)n;
+            info->set(algo, pname.c_str(), p.type, &val, true);
+        }
        else if( p.type == Param::BOOLEAN )
-            algo->set(pname, (int)n != 0);
+        {
+            bool val = (int)n != 0;
+            info->set(algo, pname.c_str(), p.type, &val, true);
+        }
        else if( p.type == Param::REAL )
-            algo->set(pname, (double)n);
+        {
+            double val = (double)n;
+            info->set(algo, pname.c_str(), p.type, &val, true);
+        }
        else if( p.type == Param::STRING )
-            algo->set(pname, (string)n);
+        {
+            string val = (string)n;
+            info->set(algo, pname.c_str(), p.type, &val, true);
+        }
        else if( p.type == Param::MAT )
        {
            Mat m;
            cv::read(n, m);
-            algo->set(pname, m);
+            info->set(algo, pname.c_str(), p.type, &m, true);
        }
        else if( p.type == Param::MAT_VECTOR )
        {
            vector<Mat> mv;
            cv::read(n, mv);
-            algo->set(pname, mv);
+            info->set(algo, pname.c_str(), p.type, &mv, true);
        }
        else if( p.type == Param::ALGORITHM )
        {
            Ptr<Algorithm> nestedAlgo = Algorithm::_create((string)n["name"]);
            CV_Assert( !nestedAlgo.empty() );
            nestedAlgo->read(n);
-            algo->set(pname, nestedAlgo);
+            info->set(algo, pname.c_str(), p.type, &nestedAlgo, true);
        }
        else
            CV_Error( CV_StsUnsupportedFormat, "unknown/unsupported parameter type");
@@ -391,24 +439,24 @@ union GetSetParam
    void (Algorithm::*set_mat_vector)(const vector<Mat>&);
    void (Algorithm::*set_algo)(const Ptr<Algorithm>&);
 };
-    
-void AlgorithmInfo::set(Algorithm* algo, const char* name, int argType, const void* value) const
+
+void AlgorithmInfo::set(Algorithm* algo, const char* name, int argType, const void* value, bool force) const
 {
    const Param* p = findstr(data->params, name);
-    
+
    if( !p )
        CV_Error_( CV_StsBadArg, ("No parameter '%s' is found", name ? name : "<NULL>") );
-    
-    if( p->readonly )
+
+    if( !force && p->readonly )
        CV_Error_( CV_StsError, ("Parameter '%s' is readonly", name));
-    
+
    GetSetParam f;
    f.set_int = p->setter;
-    
+
    if( argType == Param::INT || argType == Param::BOOLEAN || argType == Param::REAL )
    {
        CV_Assert( p->type == Param::INT || p->type == Param::REAL || p->type == Param::BOOLEAN );
-        
+
        if( p->type == Param::INT )
        {
            int val = argType == Param::INT ? *(const int*)value :
@@ -443,7 +491,7 @@ void AlgorithmInfo::set(Algorithm* algo, const char* name, int argType, const vo
    else if( argType == Param::STRING )
    {
        CV_Assert( p->type == Param::STRING );
-        
+
        const string& val = *(const string*)value;
        if( p->setter )
            (algo->*f.set_string)(val);
@@ -453,7 +501,7 @@ void AlgorithmInfo::set(Algorithm* algo, const char* name, int argType, const vo
    else if( argType == Param::MAT )
    {
        CV_Assert( p->type == Param::MAT );
-        
+
        const Mat& val = *(const Mat*)value;
        if( p->setter )
            (algo->*f.set_mat)(val);
@@ -463,7 +511,7 @@ void AlgorithmInfo::set(Algorithm* algo, const char* name, int argType, const vo
    else if( argType == Param::MAT_VECTOR )
    {
        CV_Assert( p->type == Param::MAT_VECTOR );
-        
+
        const vector<Mat>& val = *(const vector<Mat>*)value;
        if( p->setter )
            (algo->*f.set_mat_vector)(val);
@@ -473,7 +521,7 @@ void AlgorithmInfo::set(Algorithm* algo, const char* name, int argType, const vo
    else if( argType == Param::ALGORITHM )
    {
        CV_Assert( p->type == Param::ALGORITHM );
-        
+
        const Ptr<Algorithm>& val = *(const Ptr<Algorithm>*)value;
        if( p->setter )
            (algo->*f.set_algo)(val);
--- a/modules/core/src/mathfuncs.cpp
+++ b/modules/core/src/mathfuncs.cpp
@@ -2186,6 +2186,28 @@ bool checkRange(InputArray _src, bool quiet, Point* pt, double minVal, double ma
 }

    
+void patchNaNs( InputOutputArray _a, double _val )
+{
+    Mat a = _a.getMat();
+    CV_Assert( a.depth() == CV_32F );
+    
+    const Mat* arrays[] = {&a, 0};
+    int* ptrs[1];
+    NAryMatIterator it(arrays, (uchar**)ptrs);
+    size_t len = it.size*a.channels();
+    Cv32suf val;
+    val.f = (float)_val;
+    
+    for( size_t i = 0; i < it.nplanes; i++, ++it )
+    {
+        int* tptr = ptrs[0];
+        for( size_t j = 0; j < len; j++ )
+            if( (tptr[j] & 0x7fffffff) > 0x7f800000 )
+                tptr[j] = val.i;
+    }
+}
+
+    
 void exp(const float* src, float* dst, int n)
 {
    Exp_32f(src, dst, n);
--- a/modules/core/src/precomp.hpp
+++ b/modules/core/src/precomp.hpp
@@ -176,15 +176,6 @@ struct NoVec

 extern volatile bool USE_SSE2;

-typedef void (*BinaryFunc)(const uchar* src1, size_t step1,
-                           const uchar* src2, size_t step2,
-                           uchar* dst, size_t step, Size sz,
-                           void*);
-
-BinaryFunc getConvertFunc(int sdepth, int ddepth);
-BinaryFunc getConvertScaleFunc(int sdepth, int ddepth);
-BinaryFunc getCopyMaskFunc(size_t esz);
-
 enum { BLOCK_SIZE = 1024 };

 #ifdef HAVE_IPP
--- a/modules/core/src/rand.cpp
+++ b/modules/core/src/rand.cpp
@@ -48,6 +48,10 @@

 #include "precomp.hpp"

+#if defined __SSE2__ || (defined _M_IX86_FP && 2 == _M_IX86_FP)
+#include "emmintrin.h"
+#endif
+
 namespace cv
 {

@@ -196,33 +200,54 @@ DEF_RANDI_FUNC(8s, schar)
 DEF_RANDI_FUNC(16u, ushort)
 DEF_RANDI_FUNC(16s, short)
 DEF_RANDI_FUNC(32s, int)
-    
+
 static void randf_32f( float* arr, int len, uint64* state, const Vec2f* p, bool )
 {
    uint64 temp = *state;
-    int i;
+    int i = 0;

-    for( i = 0; i <= len - 4; i += 4 )
+    for( ; i <= len - 4; i += 4 )
    {
-        float f0, f1;
+        float f[4];
+        f[0] = (float)(int)(temp = RNG_NEXT(temp));
+        f[1] = (float)(int)(temp = RNG_NEXT(temp));
+        f[2] = (float)(int)(temp = RNG_NEXT(temp));
+        f[3] = (float)(int)(temp = RNG_NEXT(temp));

-        temp = RNG_NEXT(temp);
-        f0 = (int)temp*p[i][0] + p[i][1];
-        temp = RNG_NEXT(temp);
-        f1 = (int)temp*p[i+1][0] + p[i+1][1];
-        arr[i] = f0; arr[i+1] = f1;
+        // handwritten SSE is required not for performance but for numerical stability!
+        // both 32-bit gcc and MSVC compilers trend to generate double precision SSE
+        // while 64-bit compilers generate single precision SIMD instructions
+        // so manual vectorisation forces all compilers to the single precision
+#if defined __SSE2__ || (defined _M_IX86_FP && 2 == _M_IX86_FP)
+        __m128 q0 = _mm_loadu_ps((const float*)(p + i));
+        __m128 q1 = _mm_loadu_ps((const float*)(p + i + 2));

-        temp = RNG_NEXT(temp);
-        f0 = (int)temp*p[i+2][0] + p[i+2][1];
-        temp = RNG_NEXT(temp);
-        f1 = (int)temp*p[i+3][0] + p[i+3][1];
-        arr[i+2] = f0; arr[i+3] = f1;
+        __m128 q01l = _mm_unpacklo_ps(q0, q1);
+        __m128 q01h = _mm_unpackhi_ps(q0, q1);
+
+        __m128 p0 = _mm_unpacklo_ps(q01l, q01h);
+        __m128 p1 = _mm_unpackhi_ps(q01l, q01h);
+
+        _mm_storeu_ps(arr + i, _mm_add_ps(_mm_mul_ps(_mm_loadu_ps(f), p0), p1));
+#else
+        arr[i+0] = f[0]*p[i+0][0] + p[i+0][1];
+        arr[i+1] = f[1]*p[i+1][0] + p[i+1][1];
+        arr[i+2] = f[2]*p[i+2][0] + p[i+2][1];
+        arr[i+3] = f[3]*p[i+3][0] + p[i+3][1];
+#endif
    }

    for( ; i < len; i++ )
    {
        temp = RNG_NEXT(temp);
+#if defined __SSE2__ || (defined _M_IX86_FP && 2 == _M_IX86_FP)
+        _mm_store_ss(arr + i, _mm_add_ss(
+                _mm_mul_ss(_mm_set_ss((float)(int)temp), _mm_set_ss(p[i][0])),
+                _mm_set_ss(p[i][1]))
+                );
+#else
        arr[i] = (int)temp*p[i][0] + p[i][1];
+#endif
    }

    *state = temp;
--- a/modules/core/src/stat.cpp
+++ b/modules/core/src/stat.cpp
@@ -834,7 +834,6 @@ float normL2Sqr_(const float* a, const float* b, int n)
    }
    else
 #endif
-    //vz why do we need unroll here? no sse = no need to unroll
 	{
        for( ; j <= n - 4; j += 4 )
        {
@@ -875,7 +874,6 @@ float normL1_(const float* a, const float* b, int n)
    }
    else
 #endif
-     //vz no need to unroll here - if no sse
    {
        for( ; j <= n - 4; j += 4 )
        {
@@ -916,7 +914,6 @@ int normL1_(const uchar* a, const uchar* b, int n)
    }
    else
 #endif
-     //vz why do we need unroll here? no sse = no unroll
    {
        for( ; j <= n - 4; j += 4 )
        {
@@ -965,6 +962,34 @@ static const uchar popCountTable4[] =
    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
 };

+int normHamming(const uchar* a, int n)
+{
+    int i = 0, result = 0;
+#if CV_NEON
+    if (CPU_HAS_NEON_FEATURE)
+    {
+        uint32x4_t bits = vmovq_n_u32(0);
+        for (; i <= n - 16; i += 16) {
+            uint8x16_t A_vec = vld1q_u8 (a + i);
+            uint8x16_t bitsSet = vcntq_u8 (A_vec);
+            uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet);
+            uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8);
+            bits = vaddq_u32(bits, bitSet4);
+        }
+        uint64x2_t bitSet2 = vpaddlq_u32 (bits);
+        result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0);
+        result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2);
+    }
+    else
+#endif
+        for( ; i <= n - 4; i += 4 )
+            result += popCountTable[a[i]] + popCountTable[a[i+1]] +
+            popCountTable[a[i+2]] + popCountTable[a[i+3]];
+    for( ; i < n; i++ )
+        result += popCountTable[a[i]];
+    return result;
+}
+    
 int normHamming(const uchar* a, const uchar* b, int n)
 {
    int i = 0, result = 0;
@@ -995,6 +1020,27 @@ int normHamming(const uchar* a, const uchar* b, int n)
    return result;
 }

+int normHamming(const uchar* a, int n, int cellSize)
+{
+    if( cellSize == 1 )
+        return normHamming(a, n);
+    const uchar* tab = 0;
+    if( cellSize == 2 )
+        tab = popCountTable2;
+    else if( cellSize == 4 )
+        tab = popCountTable4;
+    else
+        CV_Error( CV_StsBadSize, "bad cell size (not 1, 2 or 4) in normHamming" );
+    int i = 0, result = 0;
+#if CV_ENABLE_UNROLLED
+    for( ; i <= n - 4; i += 4 )
+        result += tab[a[i]] + tab[a[i+1]] + tab[a[i+2]] + tab[a[i+3]];
+#endif
+    for( ; i < n; i++ )
+        result += tab[a[i]];
+    return result;
+}    
+    
 int normHamming(const uchar* a, const uchar* b, int n, int cellSize)
 {
    if( cellSize == 1 )
@@ -1221,38 +1267,80 @@ double cv::norm( InputArray _src, int normType, InputArray _mask )
    int depth = src.depth(), cn = src.channels();
    
    normType &= 7;
-    CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 );
+    CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR ||
+               ((normType == NORM_HAMMING || normType == NORM_HAMMING2) && src.type() == CV_8U) );
    
-    if( depth == CV_32F && src.isContinuous() && mask.empty() )
+    if( src.isContinuous() && mask.empty() )
    {
        size_t len = src.total()*cn;
        if( len == (size_t)(int)len )
        {
-            const float* data = src.ptr<float>();
-            
-            if( normType == NORM_L2 )
+            if( depth == CV_32F )
            {
-                double result = 0;
-                GET_OPTIMIZED(normL2_32f)(data, 0, &result, (int)len, 1);
-                return std::sqrt(result);
+                const float* data = src.ptr<float>();
+                
+                if( normType == NORM_L2 )
+                {
+                    double result = 0;
+                    GET_OPTIMIZED(normL2_32f)(data, 0, &result, (int)len, 1);
+                    return std::sqrt(result);
+                }
+                if( normType == NORM_L2SQR )
+                {
+                    double result = 0;
+                    GET_OPTIMIZED(normL2_32f)(data, 0, &result, (int)len, 1);
+                    return result;
+                }
+                if( normType == NORM_L1 )
+                {
+                    double result = 0;
+                    GET_OPTIMIZED(normL1_32f)(data, 0, &result, (int)len, 1);
+                    return result;
+                }
+                if( normType == NORM_INF )
+                {
+                    float result = 0;
+                    GET_OPTIMIZED(normInf_32f)(data, 0, &result, (int)len, 1);
+                    return result;
+                }
            }
-            if( normType == NORM_L1 )
+            if( depth == CV_8U )
            {
-                double result = 0;
-                GET_OPTIMIZED(normL1_32f)(data, 0, &result, (int)len, 1);
-                return result;
-            }
-            {
-                float result = 0;
-                GET_OPTIMIZED(normInf_32f)(data, 0, &result, (int)len, 1);
-                return result;
-
+                const uchar* data = src.ptr<uchar>();
+                
+                if( normType == NORM_HAMMING )
+                    return normHamming(data, (int)len);
+                
+                if( normType == NORM_HAMMING2 )
+                    return normHamming(data, (int)len, 2);
            }
        }
    }
    
    CV_Assert( mask.empty() || mask.type() == CV_8U );
    
+    if( normType == NORM_HAMMING || normType == NORM_HAMMING2 )
+    {
+        if( !mask.empty() )
+        {
+            Mat temp;
+            bitwise_and(src, mask, temp);
+            return norm(temp, normType);
+        }
+        int cellSize = normType == NORM_HAMMING ? 1 : 2;
+        
+        const Mat* arrays[] = {&src, 0};
+        uchar* ptrs[1];
+        NAryMatIterator it(arrays, ptrs);
+        int total = (int)it.size;
+        int result = 0;
+        
+        for( size_t i = 0; i < it.nplanes; i++, ++it )
+            result += normHamming(ptrs[0], total, cellSize);
+        
+        return result;
+    }
+    
    NormFunc func = normTab[normType >> 1][depth];
    CV_Assert( func != 0 );
    
@@ -1269,7 +1357,7 @@ double cv::norm( InputArray _src, int normType, InputArray _mask )
    NAryMatIterator it(arrays, ptrs);
    int j, total = (int)it.size, blockSize = total, intSumBlockSize = 0, count = 0;
    bool blockSum = (normType == NORM_L1 && depth <= CV_16S) ||
-            (normType == NORM_L2 && depth <= CV_8S);
+            ((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S);
    int isum = 0;
    int *ibuf = &result.i;
    size_t esz = 0;
@@ -1328,38 +1416,72 @@ double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _m
    CV_Assert( src1.size == src2.size && src1.type() == src2.type() );
    
    normType &= 7;
-    CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 );
+    CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR ||
+              ((normType == NORM_HAMMING || normType == NORM_HAMMING2) && src1.type() == CV_8U) );
    
-    if( src1.depth() == CV_32F && src1.isContinuous() && src2.isContinuous() && mask.empty() )
+    if( src1.isContinuous() && src2.isContinuous() && mask.empty() )
    {
        size_t len = src1.total()*src1.channels();
        if( len == (size_t)(int)len )
        {
-            const float* data1 = src1.ptr<float>();
-            const float* data2 = src2.ptr<float>();
-            
-            if( normType == NORM_L2 )
+            if( src1.depth() == CV_32F )
            {
-                double result = 0;
-                GET_OPTIMIZED(normDiffL2_32f)(data1, data2, 0, &result, (int)len, 1);
-                return std::sqrt(result);
-            }
-            if( normType == NORM_L1 )
-            {
-                double result = 0;
-                GET_OPTIMIZED(normDiffL1_32f)(data1, data2, 0, &result, (int)len, 1);
-                return result;
-            }
-            {
-                float result = 0;
-                GET_OPTIMIZED(normDiffInf_32f)(data1, data2, 0, &result, (int)len, 1);
-                return result;
+                const float* data1 = src1.ptr<float>();
+                const float* data2 = src2.ptr<float>();
+                
+                if( normType == NORM_L2 )
+                {
+                    double result = 0;
+                    GET_OPTIMIZED(normDiffL2_32f)(data1, data2, 0, &result, (int)len, 1);
+                    return std::sqrt(result);
+                }
+                if( normType == NORM_L2SQR )
+                {
+                    double result = 0;
+                    GET_OPTIMIZED(normDiffL2_32f)(data1, data2, 0, &result, (int)len, 1);
+                    return result;
+                }
+                if( normType == NORM_L1 )
+                {
+                    double result = 0;
+                    GET_OPTIMIZED(normDiffL1_32f)(data1, data2, 0, &result, (int)len, 1);
+                    return result;
+                }
+                if( normType == NORM_INF )
+                {
+                    float result = 0;
+                    GET_OPTIMIZED(normDiffInf_32f)(data1, data2, 0, &result, (int)len, 1);
+                    return result;
+                }
            }
        }
    }
    
    CV_Assert( mask.empty() || mask.type() == CV_8U );
    
+    if( normType == NORM_HAMMING || normType == NORM_HAMMING2 )
+    {
+        if( !mask.empty() )
+        {
+            Mat temp;
+            bitwise_xor(src1, src2, temp);
+            bitwise_and(temp, mask, temp);
+            return norm(temp, normType);
+        }
+        int cellSize = normType == NORM_HAMMING ? 1 : 2;
+        
+        const Mat* arrays[] = {&src1, &src2, 0};
+        uchar* ptrs[2];
+        NAryMatIterator it(arrays, ptrs);
+        int total = (int)it.size;
+        int result = 0;
+        
+        for( size_t i = 0; i < it.nplanes; i++, ++it )
+            result += normHamming(ptrs[0], ptrs[1], total, cellSize);
+        
+        return result;
+    }
+    
    NormDiffFunc func = normDiffTab[normType >> 1][depth];
    CV_Assert( func != 0 );
    
@@ -1377,7 +1499,7 @@ double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _m
    NAryMatIterator it(arrays, ptrs);
    int j, total = (int)it.size, blockSize = total, intSumBlockSize = 0, count = 0;
    bool blockSum = (normType == NORM_L1 && depth <= CV_16S) ||
-            (normType == NORM_L2 && depth <= CV_8S);
+            ((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S);
    unsigned isum = 0;
    unsigned *ibuf = &result.u;
    size_t esz = 0;