added IPP dot product functions. Only 32f data type supported for now (there are accuracy issues in 8u/16s/16u/32s functions which will be fixed in IPP 7.0.3)

2011-02-27 18:12:30 +00:00 · 2011-02-27 18:12:30 +00:00 · 4f83a06358
commit 4f83a06358
parent 17dc1e1340
1 changed files with 215 additions and 9 deletions
--- a/modules/core/src/matmul.cpp
+++ b/modules/core/src/matmul.cpp
@ -42,6 +42,10 @@

 #include "precomp.hpp"

+#ifdef HAVE_IPP
+#include "ippversion.h"
+#endif
+
 namespace cv
 {

@ -2629,11 +2633,183 @@ void mulTransposed( const Mat& src, Mat& dst, bool ata,
 *                                      Dot Product                                       *
 \****************************************************************************************/

+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+
+static double ippDotProd8u(const Mat& srcmat1, const Mat& srcmat2)
+{
+    int      nchan  = srcmat1.channels();
+    Ipp64f   sum[4] = { 0.0 };
+    IppiSize roi    = { srcmat1.cols, srcmat1.rows };
+
+    switch(nchan)
+    {
+        case 1:
+            ippiDotProd_8u64f_C1R((const Ipp8u*)srcmat1.data, (int)srcmat1.step,
+                                  (const Ipp8u*)srcmat2.data, (int)srcmat2.step, roi, sum);
+            break;
+
+        case 3:
+            ippiDotProd_8u64f_C3R((const Ipp8u*)srcmat1.data, (int)srcmat1.step,
+                                  (const Ipp8u*)srcmat2.data, (int)srcmat2.step, roi, sum);
+            break;
+
+        case 4:
+            ippiDotProd_8u64f_C4R((const Ipp8u*)srcmat1.data, (int)srcmat1.step,
+                                  (const Ipp8u*)srcmat2.data, (int)srcmat2.step, roi, sum);
+            break;
+    }
+
+    for(int c = 1; c < nchan; c++)
+    {
+        sum[0] += sum[c];
+    }
+
+    return sum[0];
+} // ippDotProd8u()
+
+
+static double ippDotProd16u(const Mat& srcmat1, const Mat& srcmat2)
+{
+    int      nchan  = srcmat1.channels();
+    Ipp64f   sum[4] = { 0.0 };
+    IppiSize roi    = { srcmat1.cols, srcmat1.rows };
+
+    switch(nchan)
+    {
+        case 1:
+            ippiDotProd_16u64f_C1R((const Ipp16u*)srcmat1.data, (int)srcmat1.step,
+                                   (const Ipp16u*)srcmat2.data, (int)srcmat2.step, roi, sum);
+            break;
+
+        case 3:
+            ippiDotProd_16u64f_C3R((const Ipp16u*)srcmat1.data, (int)srcmat1.step,
+                                   (const Ipp16u*)srcmat2.data, (int)srcmat2.step, roi, sum);
+            break;
+
+        case 4:
+            ippiDotProd_16u64f_C4R((const Ipp16u*)srcmat1.data, (int)srcmat1.step,
+                                   (const Ipp16u*)srcmat2.data, (int)srcmat2.step, roi, sum);
+            break;
+    }
+
+    for(int c = 1; c < nchan; c++)
+    {
+        sum[0] += sum[c];
+    }
+
+    return sum[0];
+} // ippDotProd16u()
+
+
+static double ippDotProd16s(const Mat& srcmat1, const Mat& srcmat2)
+{
+    int      nchan  = srcmat1.channels();
+    Ipp64f   sum[4] = { 0.0 };
+    IppiSize roi    = { srcmat1.cols, srcmat1.rows };
+
+    switch(nchan)
+    {
+        case 1:
+            ippiDotProd_16s64f_C1R((const Ipp16s*)srcmat1.data, (int)srcmat1.step,
+                                   (const Ipp16s*)srcmat2.data, (int)srcmat2.step, roi, sum);
+            break;
+
+        case 3:
+            ippiDotProd_16s64f_C3R((const Ipp16s*)srcmat1.data, (int)srcmat1.step,
+                                   (const Ipp16s*)srcmat2.data, (int)srcmat2.step, roi, sum);
+            break;
+
+        case 4:
+            ippiDotProd_16s64f_C4R((const Ipp16s*)srcmat1.data, (int)srcmat1.step,
+                                   (const Ipp16s*)srcmat2.data, (int)srcmat2.step, roi, sum);
+            break;
+    }
+
+    for(int c = 1; c < nchan; c++)
+    {
+        sum[0] += sum[c];
+    }
+
+    return sum[0];
+} // ippDotProd16s()
+
+
+static double ippDotProd32s(const Mat& srcmat1, const Mat& srcmat2)
+{
+    int      nchan  = srcmat1.channels();
+    Ipp64f   sum[4] = { 0.0 };
+    IppiSize roi    = { srcmat1.cols, srcmat1.rows };
+
+    switch(nchan)
+    {
+        case 1:
+            ippiDotProd_32s64f_C1R((const Ipp32s*)srcmat1.data, (int)srcmat1.step,
+                                   (const Ipp32s*)srcmat2.data, (int)srcmat2.step, roi, sum);
+            break;
+
+        case 3:
+            ippiDotProd_32s64f_C3R((const Ipp32s*)srcmat1.data, (int)srcmat1.step,
+                                   (const Ipp32s*)srcmat2.data, (int)srcmat2.step, roi, sum);
+            break;
+
+        case 4:
+            ippiDotProd_32s64f_C4R((const Ipp32s*)srcmat1.data, (int)srcmat1.step,
+                                   (const Ipp32s*)srcmat2.data, (int)srcmat2.step, roi, sum);
+            break;
+    }
+
+    for(int c = 1; c < nchan; c++)
+    {
+        sum[0] += sum[c];
+    }
+
+    return sum[0];
+} // ippDotProd32s()
+
+
+static double ippDotProd32f(const Mat& srcmat1, const Mat& srcmat2)
+{
+    int      nchan  = srcmat1.channels();
+    Ipp64f   sum[4] = { 0.0 };
+    IppiSize roi    = { srcmat1.cols, srcmat1.rows };
+
+    switch(nchan)
+    {
+        case 1:
+            ippiDotProd_32f64f_C1R((const Ipp32f*)srcmat1.data, (int)srcmat1.step,
+                                   (const Ipp32f*)srcmat2.data, (int)srcmat2.step, roi, sum, ippAlgHintAccurate);
+            break;
+
+        case 3:
+            ippiDotProd_32f64f_C3R((const Ipp32f*)srcmat1.data, (int)srcmat1.step,
+                                   (const Ipp32f*)srcmat2.data, (int)srcmat2.step, roi, sum, ippAlgHintAccurate);
+            break;
+
+        case 4:
+            ippiDotProd_32f64f_C4R((const Ipp32f*)srcmat1.data, (int)srcmat1.step,
+                                   (const Ipp32f*)srcmat2.data, (int)srcmat2.step, roi, sum, ippAlgHintAccurate);
+            break;
+    }
+
+    for(int c = 1; c < nchan; c++)
+    {
+        sum[0] += sum[c];
+    }
+
+    return sum[0];
+} // ippDotProd32f()
+
+#endif
+
 template<typename T, typename WT, typename ST> static double
 dotprod_( const Mat& srcmat1, const Mat& srcmat2 )
 {
-    const T *src1 = (const T*)srcmat1.data, *src2 = (const T*)srcmat2.data;
-    size_t step1 = srcmat1.step/sizeof(src1[0]), step2 = srcmat2.step/sizeof(src2[0]);
+    const T* src1 = (const T*)srcmat1.data;
+    const T* src2 = (const T*)srcmat2.data;
+
+    size_t step1 = srcmat1.step/sizeof(src1[0]);
+    size_t step2 = srcmat2.step/sizeof(src2[0]);
+
    ST sum = 0;
    Size size = getContinuousSize( srcmat1, srcmat2, srcmat1.channels() );

@ -2642,6 +2818,7 @@ dotprod_( const Mat& srcmat1, const Mat& srcmat2 )
        WT t = 0;
        for( ; size.height--; src1 += step1, src2 += step2 )
            t += (WT)src1[0]*src2[0];
+
        sum += t;
    }
    else
@ -2652,17 +2829,19 @@ dotprod_( const Mat& srcmat1, const Mat& srcmat2 )
            WT t = 0;
            for( i = 0; i <= size.width - 4; i += 4 )
            {
-                sum += (WT)src1[i]*src2[i] +
-                    (WT)src1[i+1]*src2[i+1] +
-                    (WT)src1[i+2]*src2[i+2] +
-                    (WT)src1[i+3]*src2[i+3];
+                sum += (WT)src1[i  ]*src2[i  ] +
+                       (WT)src1[i+1]*src2[i+1] +
+                       (WT)src1[i+2]*src2[i+2] +
+                       (WT)src1[i+3]*src2[i+3];
            }

            for( ; i < size.width; i++ )
                t += (WT)src1[i]*src2[i];
+
            sum += t;
        }
    }
+
    return (double)sum;
 }

@ -2670,16 +2849,43 @@ typedef double (*DotProductFunc)(const Mat& src1, const Mat& src2);

 double Mat::dot(const Mat& mat) const
 {
-    static DotProductFunc tab[] = {
-        dotprod_<uchar, int, int64>, 0,
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+    static DotProductFunc ipptab[] =
+    {
+        dotprod_<uchar, int, int64>,
+        0,
+        dotprod_<ushort, double, double>,
+        dotprod_<short, double, double>,
+        dotprod_<int, double, double>,
+        ippDotProd32f,
+        dotprod_<double, double, double>,
+        0
+    };
+#endif
+
+    static DotProductFunc tab[] =
+    {
+        dotprod_<uchar, int, int64>,
+        0,
        dotprod_<ushort, double, double>,
        dotprod_<short, double, double>,
        dotprod_<int, double, double>,
        dotprod_<float, double, double>,
-        dotprod_<double, double, double>, 0 };
+        dotprod_<double, double, double>,
+        0
+    };

    DotProductFunc func = tab[depth()];
+
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+    if((*this).channels() != 2)
+    {
+        func = ipptab[depth()];
+    }
+#endif
+
    CV_Assert( mat.type() == type() && mat.size() == size() && func != 0 );
+
    return func( *this, mat );
 }