Added SharedMatrix

This commit is contained in:
Elena Gvozdeva
2014-03-27 17:40:50 +04:00
parent fa2d79a15b
commit 037ac27ec8
8 changed files with 198 additions and 77 deletions

View File

@@ -6,21 +6,31 @@ Intel® IPP Asynchronous C/C++ Converters
General Information
-------------------
This section describes conversion between OpenCV and `Intel® IPP Asynchronous C/C++ <http://software.intel.com/en-us/intel-ipp-preview>`_ library.
This section describes conversion between OpenCV and `Intel® IPP Asynchronous C/C++ <http://software.intel.com/en-us/intel-ipp-preview>`_ library.
`Getting Started Guide <http://registrationcenter.intel.com/irc_nas/3727/ipp_async_get_started.htm>`_ help you to install the library, configure header and library build paths.
hpp::getHpp
-----------
Create ``hppiMatrix`` from ``Mat``.
.. ocv:function:: Ptr<hppiMatrix> hpp::getHpp(const Mat& src)
.. ocv:function:: hppiMatrix* hpp::getHpp(const Mat& src, hppAccel accel)
:param src: input matrix.
:param accel: accelerator instance. Supports type:
* **HPP_ACCEL_TYPE_CPU** - accelerated by optimized CPU instructions.
* **HPP_ACCEL_TYPE_GPU** - accelerated by GPU programmable units or fixed-function accelerators.
* **HPP_ACCEL_TYPE_ANY** - any acceleration or no acceleration available.
This function allocates and initializes the ``hppiMatrix`` that has the same size and type as input matrix, returns the ``hppiMatrix*``.
If you want to use zero-copy for GPU you should to have 4KB aligned matrix data. See details `hppiCreateSharedMatrix <http://software.intel.com/ru-ru/node/501697>`_.
This function allocates and initializes the ``hppiMatrix`` that has the same size and type as input matrix, returns the ``Ptr<hppiMatrix>``.
Supports ``CV_8U``, ``CV_16U``, ``CV_16S``, ``CV_32S``, ``CV_32F``, ``CV_64F``.
.. note:: The ``hppiMatrix`` pointer to the image buffer in system memory refers to the ``src.data``. Control the lifetime of the matrix and don't change its data, if there is no special need.
.. note:: The ``hppiMatrix`` pointer to the image buffer in system memory refers to the ``src.data``. Control the lifetime of the matrix and don't change its data, if there is no special need.
.. seealso:: :ref:`howToUseIPPAconversion`, :ocv:func:`hpp::getMat`
@@ -32,11 +42,11 @@ Create ``Mat`` from ``hppiMatrix``.
:param src: input hppiMatrix.
:param accel: accelerator instance.
:param accel: accelerator instance (see :ocv:func:`hpp::getHpp` for the list of acceleration framework types).
:param cn: number of channels.
This function allocates and initializes the ``Mat`` that has the same size and type as input matrix.
This function allocates and initializes the ``Mat`` that has the same size and type as input matrix.
Supports ``CV_8U``, ``CV_16U``, ``CV_16S``, ``CV_32S``, ``CV_32F``, ``CV_64F``.
.. seealso:: :ref:`howToUseIPPAconversion`, :ocv:func:`hpp::copyHppToMat`, :ocv:func:`hpp::getHpp`.
@@ -49,14 +59,14 @@ Convert ``hppiMatrix`` to ``Mat``.
.. ocv:function:: void hpp::copyHppToMat(hppiMatrix* src, Mat& dst, hppAccel accel, int cn)
:param src: input hppiMatrix.
:param dst: output matrix.
:param accel: accelerator instance.
:param accel: accelerator instance (see :ocv:func:`hpp::getHpp` for the list of acceleration framework types).
:param cn: number of channels.
This function allocates and initializes new matrix (if needed) that has the same size and type as input matrix.
This function allocates and initializes new matrix (if needed) that has the same size and type as input matrix.
Supports ``CV_8U``, ``CV_16U``, ``CV_16S``, ``CV_32S``, ``CV_32F``, ``CV_64F``.
.. seealso:: :ref:`howToUseIPPAconversion`, :ocv:func:`hpp::getMat`, :ocv:func:`hpp::getHpp`.

View File

@@ -1,16 +1,14 @@
#ifndef __OPENCV_CORE_IPPASYNC_HPP__
#define __OPENCV_CORE_IPPASYNC_HPP__
#ifdef HAVE_IPP_A
#include "opencv2/core.hpp"
#include <ipp_async_op.h>
#include <ipp_async_accel.h>
namespace cv
{
void DefaultDeleter<hppiMatrix>::operator () (hppiMatrix* p) const
{
hppiFreeMatrix(p);
}
namespace hpp
{
@@ -25,7 +23,7 @@ namespace hpp
depth == CV_32F ? HPP_DATA_TYPE_32F :
depth == CV_64F ? HPP_DATA_TYPE_64F : -1;
CV_Assert( hppType >= 0 );
return hppType;
return hppType;
}
//convert hppDataType to OpenCV data type
@@ -47,7 +45,8 @@ namespace hpp
hpp32u width, height;
hppStatus sts;
CV_Assert(src!=NULL);
if (src == NULL)
return dst.release();
sts = hppiInquireMatrix(src, &type, &width, &height);
@@ -76,17 +75,31 @@ namespace hpp
return dst;
}
//create hppiMatrix from cv::Mat
inline Ptr<hppiMatrix> getHpp(const Mat& src)
//create hppiMatrix from cv::Mat
inline hppiMatrix* getHpp(const Mat& src, hppAccel accel)
{
int htype = toHppType(src.type());
int cn = src.channels();
CV_Assert(src.data);
hppiMatrix *dst = hppiCreateMatrix(htype, src.cols*cn, src.rows, src.data, (hpp32s)(src.step));
hppAccelType accelType = hppQueryAccelType(accel);
return Ptr<hppiMatrix>(dst);
if (accelType!=HPP_ACCEL_TYPE_CPU)
{
hpp32u pitch, size;
hppQueryMatrixAllocParams(accel, src.cols*cn, src.rows, htype, &pitch, &size);
if (pitch!=0 && size!=0)
if ((int)(src.data)%4096==0 && pitch==(hpp32u)(src.step))
{
return hppiCreateSharedMatrix(htype, src.cols*cn, src.rows, src.data, pitch, size);
}
}
return hppiCreateMatrix(htype, src.cols*cn, src.rows, src.data, (hpp32s)(src.step));;
}
}}
#endif
#endif

View File

@@ -1,6 +1,7 @@
#include "test_precomp.hpp"
#include "opencv2/ts/ocl_test.hpp"
#ifdef HAVE_IPP_A
#include "opencv2/core/ippasync.hpp"
using namespace cv;
@@ -18,7 +19,7 @@ PARAM_TEST_CASE(IPPAsync, MatDepth, Channels, hppAccelType)
hppAccelType accelType;
Mat matrix, result;
Ptr<hppiMatrix> hppMat;
hppiMatrix * hppMat;
hppAccel accel;
hppiVirtualMatrix * virtMatrix;
hppStatus sts;
@@ -47,17 +48,16 @@ PARAM_TEST_CASE(IPPAsync, MatDepth, Channels, hppAccelType)
TEST_P(IPPAsync, accuracy)
{
if (depth==CV_32S || depth==CV_64F)
return;
sts = hppCreateInstance(accelType, 0, &accel);
if (sts!=HPP_STATUS_NO_ERROR) printf("hppStatus= %d\n",sts);
CV_Assert(sts==HPP_STATUS_NO_ERROR);
virtMatrix = hppiCreateVirtualMatrices(accel, 2);
for (int j = 0; j < test_loop_times; j++)
{
generateTestData();
hppMat = hpp::getHpp(matrix);
hppMat = hpp::getHpp(matrix,accel);
hppScalar a = 3;
@@ -68,10 +68,13 @@ TEST_P(IPPAsync, accuracy)
sts = hppWait(accel, HPP_TIME_OUT_INFINITE);
CV_Assert(sts==HPP_STATUS_NO_ERROR);
result = hpp::getMat(virtMatrix[1], accel, cn);
Near(5.0e-6);
sts = hppiFreeMatrix(hppMat);
CV_Assert(sts==HPP_STATUS_NO_ERROR);
}
sts = hppiDeleteVirtualMatrices(accel, virtMatrix);
@@ -80,26 +83,82 @@ TEST_P(IPPAsync, accuracy)
CV_Assert(sts==HPP_STATUS_NO_ERROR);
}
TEST_P(IPPAsync, conversion)
PARAM_TEST_CASE(IPPAsyncShared, Channels, hppAccelType)
{
int cn;
int type;
hppAccelType accelType;
Mat matrix, result;
hppiMatrix* hppMat;
hppAccel accel;
hppiVirtualMatrix * virtMatrix;
hppStatus sts;
virtual void SetUp()
{
cn = GET_PARAM(0);
accelType = GET_PARAM(1);
type=CV_MAKE_TYPE(CV_8U, GET_PARAM(0));
}
virtual void generateTestData()
{
Size matrix_Size = randomSize(2, 100);
hpp32u pitch, size;
const int upValue = 100;
sts = hppQueryMatrixAllocParams(accel, (hpp32u)(matrix_Size.width*cn), (hpp32u)matrix_Size.height, HPP_DATA_TYPE_8U, &pitch, &size);
if (pitch!=0 && size!=0)
{
uchar *pData = (uchar*)_aligned_malloc(size, 4096);
for (int j=0; j<matrix_Size.height; j++)
for(int i=0; i<matrix_Size.width*cn; i++)
pData[i+j*pitch] = rand()%upValue;
matrix = Mat(matrix_Size.height, matrix_Size.width, type, pData, pitch);
}
matrix = randomMat(matrix_Size, type, 0, upValue);
}
void Near(double threshold = 0.0)
{
EXPECT_MAT_NEAR(matrix, result, threshold);
}
};
TEST_P(IPPAsyncShared, accuracy)
{
sts = hppCreateInstance(accelType, 0, &accel);
if (sts!=HPP_STATUS_NO_ERROR) printf("hppStatus= %d\n",sts);
CV_Assert(sts==HPP_STATUS_NO_ERROR);
virtMatrix = hppiCreateVirtualMatrices(accel, 1);
virtMatrix = hppiCreateVirtualMatrices(accel, 2);
for (int j = 0; j < test_loop_times; j++)
{
generateTestData();
hppMat = hpp::getHpp(matrix);
hppMat = hpp::getHpp(matrix,accel);
sts = hppiCopy (accel, hppMat, virtMatrix[0]);
hppScalar a = 3;
sts = hppiAddC(accel, hppMat, a, 0, virtMatrix[0]);
CV_Assert(sts==HPP_STATUS_NO_ERROR);
sts = hppiSubC(accel, virtMatrix[0], a, 0, virtMatrix[1]);
CV_Assert(sts==HPP_STATUS_NO_ERROR);
sts = hppWait(accel, HPP_TIME_OUT_INFINITE);
CV_Assert(sts==HPP_STATUS_NO_ERROR);
result = hpp::getMat(virtMatrix[0], accel, cn);
Near();
result = hpp::getMat(virtMatrix[1], accel, cn);
Near(0);
sts = hppiFreeMatrix(hppMat);
CV_Assert(sts==HPP_STATUS_NO_ERROR);
}
sts = hppiDeleteVirtualMatrices(accel, virtMatrix);
@@ -108,9 +167,13 @@ TEST_P(IPPAsync, conversion)
CV_Assert(sts==HPP_STATUS_NO_ERROR);
}
INSTANTIATE_TEST_CASE_P(IppATest, IPPAsync, Combine(Values(CV_8U, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F),
INSTANTIATE_TEST_CASE_P(IppATest, IPPAsyncShared, Combine(Values(1, 2, 3, 4),
Values( HPP_ACCEL_TYPE_CPU, HPP_ACCEL_TYPE_GPU)));
INSTANTIATE_TEST_CASE_P(IppATest, IPPAsync, Combine(Values(CV_8U, CV_16U, CV_16S, CV_32F),
Values(1, 2, 3, 4),
Values( HPP_ACCEL_TYPE_CPU, HPP_ACCEL_TYPE_GPU)));
}
}
}
#endif