Added SharedMatrix

2014-03-27 17:40:50 +04:00
parent fa2d79a15b
commit 037ac27ec8
8 changed files with 198 additions and 77 deletions
--- a/modules/core/doc/ipp_async_converters.rst
+++ b/modules/core/doc/ipp_async_converters.rst
@@ -6,21 +6,31 @@ Intel® IPP Asynchronous C/C++ Converters
 General Information
 -------------------

-This section describes conversion between OpenCV and `Intel® IPP Asynchronous C/C++ <http://software.intel.com/en-us/intel-ipp-preview>`_ library. 
+This section describes conversion between OpenCV and `Intel® IPP Asynchronous C/C++ <http://software.intel.com/en-us/intel-ipp-preview>`_ library.
 `Getting Started Guide <http://registrationcenter.intel.com/irc_nas/3727/ipp_async_get_started.htm>`_ help you to install the library, configure header and library build paths.

 hpp::getHpp
 -----------
 Create ``hppiMatrix`` from ``Mat``.

-.. ocv:function:: Ptr<hppiMatrix> hpp::getHpp(const Mat& src)
+.. ocv:function:: hppiMatrix* hpp::getHpp(const Mat& src, hppAccel accel)

    :param src: input matrix.
+    :param accel: accelerator instance. Supports type:
+
+            * **HPP_ACCEL_TYPE_CPU** - accelerated by optimized CPU instructions.
+
+            * **HPP_ACCEL_TYPE_GPU** - accelerated by GPU programmable units or fixed-function accelerators.
+
+            * **HPP_ACCEL_TYPE_ANY** - any acceleration or no acceleration available.
+
+This function allocates and initializes the ``hppiMatrix`` that has the same size and type as input matrix, returns the ``hppiMatrix*``.
+
+If you want to use zero-copy for GPU you should to have 4KB aligned matrix data. See details `hppiCreateSharedMatrix <http://software.intel.com/ru-ru/node/501697>`_.

-This function allocates and initializes the ``hppiMatrix`` that has the same size and type as input matrix, returns the ``Ptr<hppiMatrix>``.
 Supports ``CV_8U``, ``CV_16U``, ``CV_16S``, ``CV_32S``, ``CV_32F``, ``CV_64F``.

-.. note:: The ``hppiMatrix`` pointer to the image buffer in system memory refers to the ``src.data``. Control the lifetime of the matrix and don't change its data, if there is no special need. 
+.. note:: The ``hppiMatrix`` pointer to the image buffer in system memory refers to the ``src.data``. Control the lifetime of the matrix and don't change its data, if there is no special need.
 .. seealso:: :ref:`howToUseIPPAconversion`, :ocv:func:`hpp::getMat`


@@ -32,11 +42,11 @@ Create ``Mat`` from ``hppiMatrix``.

    :param src: input hppiMatrix.

-    :param accel: accelerator instance.
+    :param accel: accelerator instance (see :ocv:func:`hpp::getHpp` for the list of acceleration framework types).

    :param cn: number of channels.

-This function allocates and initializes the ``Mat`` that has the same size and type as input matrix. 
+This function allocates and initializes the ``Mat`` that has the same size and type as input matrix.
 Supports ``CV_8U``, ``CV_16U``, ``CV_16S``, ``CV_32S``, ``CV_32F``, ``CV_64F``.

 .. seealso:: :ref:`howToUseIPPAconversion`, :ocv:func:`hpp::copyHppToMat`, :ocv:func:`hpp::getHpp`.
@@ -49,14 +59,14 @@ Convert ``hppiMatrix`` to ``Mat``.
 .. ocv:function:: void hpp::copyHppToMat(hppiMatrix* src, Mat& dst, hppAccel accel, int cn)

    :param src: input hppiMatrix.
-    
+
    :param dst: output matrix.

-    :param accel: accelerator instance.
+    :param accel: accelerator instance (see :ocv:func:`hpp::getHpp` for the list of acceleration framework types).

    :param cn: number of channels.

-This function allocates and initializes new matrix (if needed) that has the same size and type as input matrix. 
+This function allocates and initializes new matrix (if needed) that has the same size and type as input matrix.
 Supports ``CV_8U``, ``CV_16U``, ``CV_16S``, ``CV_32S``, ``CV_32F``, ``CV_64F``.

 .. seealso:: :ref:`howToUseIPPAconversion`, :ocv:func:`hpp::getMat`, :ocv:func:`hpp::getHpp`.
--- a/modules/core/include/opencv2/core/ippasync.hpp
+++ b/modules/core/include/opencv2/core/ippasync.hpp
@@ -1,16 +1,14 @@
 #ifndef __OPENCV_CORE_IPPASYNC_HPP__
 #define __OPENCV_CORE_IPPASYNC_HPP__

+#ifdef HAVE_IPP_A
+
 #include "opencv2/core.hpp"
 #include <ipp_async_op.h>
 #include <ipp_async_accel.h>

 namespace cv
 {
-    void DefaultDeleter<hppiMatrix>::operator () (hppiMatrix* p) const
-    {
-        hppiFreeMatrix(p);
-    }

 namespace hpp
 {
@@ -25,7 +23,7 @@ namespace hpp
                     depth == CV_32F ? HPP_DATA_TYPE_32F :
                     depth == CV_64F ? HPP_DATA_TYPE_64F : -1;
        CV_Assert( hppType >= 0 );
-        return hppType; 
+        return hppType;
    }

    //convert hppDataType to OpenCV data type
@@ -47,7 +45,8 @@ namespace hpp
        hpp32u width, height;
        hppStatus sts;

-        CV_Assert(src!=NULL);
+        if (src == NULL)
+            return dst.release();

        sts = hppiInquireMatrix(src, &type, &width, &height);

@@ -76,17 +75,31 @@ namespace hpp
        return dst;
    }

-     //create hppiMatrix from cv::Mat
-    inline Ptr<hppiMatrix> getHpp(const Mat& src)
+    //create hppiMatrix from cv::Mat
+    inline hppiMatrix* getHpp(const Mat& src, hppAccel accel)
    {
        int htype = toHppType(src.type());
        int cn = src.channels();

        CV_Assert(src.data);
-        hppiMatrix *dst = hppiCreateMatrix(htype, src.cols*cn, src.rows, src.data, (hpp32s)(src.step));
+        hppAccelType accelType = hppQueryAccelType(accel);

-        return Ptr<hppiMatrix>(dst);
+        if (accelType!=HPP_ACCEL_TYPE_CPU)
+        {
+            hpp32u pitch, size;
+            hppQueryMatrixAllocParams(accel, src.cols*cn, src.rows, htype, &pitch, &size);
+            if (pitch!=0 && size!=0)
+                if ((int)(src.data)%4096==0 && pitch==(hpp32u)(src.step))
+                {
+                    return hppiCreateSharedMatrix(htype, src.cols*cn, src.rows, src.data, pitch, size);
+                }
+        }
+
+        return hppiCreateMatrix(htype, src.cols*cn, src.rows, src.data, (hpp32s)(src.step));;
    }
+
 }}

+#endif
+
 #endif
--- a/modules/core/test/test_ippasync.cpp
+++ b/modules/core/test/test_ippasync.cpp
@@ -1,6 +1,7 @@
 #include "test_precomp.hpp"
 #include "opencv2/ts/ocl_test.hpp"

+#ifdef HAVE_IPP_A
 #include "opencv2/core/ippasync.hpp"

 using namespace cv;
@@ -18,7 +19,7 @@ PARAM_TEST_CASE(IPPAsync, MatDepth, Channels, hppAccelType)
    hppAccelType accelType;

    Mat matrix, result;
-    Ptr<hppiMatrix> hppMat;
+    hppiMatrix * hppMat;
    hppAccel accel;
    hppiVirtualMatrix * virtMatrix;
    hppStatus sts;
@@ -47,17 +48,16 @@ PARAM_TEST_CASE(IPPAsync, MatDepth, Channels, hppAccelType)

 TEST_P(IPPAsync, accuracy)
 {
-    if (depth==CV_32S || depth==CV_64F)
-        return;
-    
    sts = hppCreateInstance(accelType, 0, &accel);
+    if (sts!=HPP_STATUS_NO_ERROR) printf("hppStatus= %d\n",sts);
    CV_Assert(sts==HPP_STATUS_NO_ERROR);
+
    virtMatrix = hppiCreateVirtualMatrices(accel, 2);

    for (int j = 0; j < test_loop_times; j++)
    {
        generateTestData();
-        hppMat = hpp::getHpp(matrix);
+        hppMat = hpp::getHpp(matrix,accel);

        hppScalar a = 3;

@@ -68,10 +68,13 @@ TEST_P(IPPAsync, accuracy)

        sts = hppWait(accel, HPP_TIME_OUT_INFINITE);
        CV_Assert(sts==HPP_STATUS_NO_ERROR);
-        
+
        result = hpp::getMat(virtMatrix[1], accel, cn);

        Near(5.0e-6);
+
+        sts =  hppiFreeMatrix(hppMat);
+        CV_Assert(sts==HPP_STATUS_NO_ERROR);
    }

    sts = hppiDeleteVirtualMatrices(accel, virtMatrix);
@@ -80,26 +83,82 @@ TEST_P(IPPAsync, accuracy)
    CV_Assert(sts==HPP_STATUS_NO_ERROR);
 }

-TEST_P(IPPAsync, conversion)
+PARAM_TEST_CASE(IPPAsyncShared, Channels, hppAccelType)
+{
+    int cn;
+    int type;
+    hppAccelType accelType;
+
+    Mat matrix, result;
+    hppiMatrix* hppMat;
+    hppAccel accel;
+    hppiVirtualMatrix * virtMatrix;
+    hppStatus sts;
+
+    virtual void SetUp()
+    {
+        cn = GET_PARAM(0);
+        accelType = GET_PARAM(1);
+        type=CV_MAKE_TYPE(CV_8U, GET_PARAM(0));
+    }
+
+    virtual void generateTestData()
+    {
+        Size matrix_Size = randomSize(2, 100);
+        hpp32u pitch, size;
+        const int upValue = 100;
+
+        sts = hppQueryMatrixAllocParams(accel, (hpp32u)(matrix_Size.width*cn), (hpp32u)matrix_Size.height, HPP_DATA_TYPE_8U, &pitch, &size);
+
+        if (pitch!=0 && size!=0)
+        {
+            uchar *pData = (uchar*)_aligned_malloc(size, 4096);
+
+            for (int j=0; j<matrix_Size.height; j++)
+                for(int i=0; i<matrix_Size.width*cn; i++)
+                    pData[i+j*pitch] = rand()%upValue;
+
+            matrix = Mat(matrix_Size.height, matrix_Size.width, type, pData, pitch);
+        }
+
+        matrix = randomMat(matrix_Size, type, 0, upValue);
+    }
+
+    void Near(double threshold = 0.0)
+    {
+        EXPECT_MAT_NEAR(matrix, result, threshold);
+    }
+};
+
+TEST_P(IPPAsyncShared, accuracy)
 {
    sts = hppCreateInstance(accelType, 0, &accel);
+    if (sts!=HPP_STATUS_NO_ERROR) printf("hppStatus= %d\n",sts);
    CV_Assert(sts==HPP_STATUS_NO_ERROR);
-    virtMatrix = hppiCreateVirtualMatrices(accel, 1);
+
+    virtMatrix = hppiCreateVirtualMatrices(accel, 2);

    for (int j = 0; j < test_loop_times; j++)
    {
        generateTestData();
-        hppMat = hpp::getHpp(matrix);
+        hppMat = hpp::getHpp(matrix,accel);

-        sts = hppiCopy (accel, hppMat, virtMatrix[0]);
+        hppScalar a = 3;
+
+        sts = hppiAddC(accel, hppMat, a, 0, virtMatrix[0]);
+        CV_Assert(sts==HPP_STATUS_NO_ERROR);
+        sts = hppiSubC(accel, virtMatrix[0], a, 0, virtMatrix[1]);
        CV_Assert(sts==HPP_STATUS_NO_ERROR);

        sts = hppWait(accel, HPP_TIME_OUT_INFINITE);
        CV_Assert(sts==HPP_STATUS_NO_ERROR);
-        
-        result = hpp::getMat(virtMatrix[0], accel, cn);

-        Near();
+        result = hpp::getMat(virtMatrix[1], accel, cn);
+
+        Near(0);
+
+        sts =  hppiFreeMatrix(hppMat);
+        CV_Assert(sts==HPP_STATUS_NO_ERROR);
    }

    sts = hppiDeleteVirtualMatrices(accel, virtMatrix);
@@ -108,9 +167,13 @@ TEST_P(IPPAsync, conversion)
    CV_Assert(sts==HPP_STATUS_NO_ERROR);
 }

-INSTANTIATE_TEST_CASE_P(IppATest, IPPAsync, Combine(Values(CV_8U, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F),
+INSTANTIATE_TEST_CASE_P(IppATest, IPPAsyncShared, Combine(Values(1, 2, 3, 4),
+                                                    Values( HPP_ACCEL_TYPE_CPU, HPP_ACCEL_TYPE_GPU)));
+
+INSTANTIATE_TEST_CASE_P(IppATest, IPPAsync, Combine(Values(CV_8U, CV_16U, CV_16S, CV_32F),
                                                   Values(1, 2, 3, 4),
                                                   Values( HPP_ACCEL_TYPE_CPU, HPP_ACCEL_TYPE_GPU)));

 }
-}
+}
+#endif