Merge pull request #5493 from lupustr3:pvlasov/ipp9_fixes
This commit is contained in:
commit
466a98f7c3
@ -241,6 +241,26 @@ static inline IppDataType ippiGetDataType(int depth)
|
||||
depth == CV_64F ? ipp64f : (IppDataType)-1;
|
||||
}
|
||||
|
||||
// IPP temporary buffer hepler
|
||||
template<typename T>
|
||||
class IppAutoBuffer
|
||||
{
|
||||
public:
|
||||
IppAutoBuffer() { m_pBuffer = NULL; }
|
||||
IppAutoBuffer(int size) { Alloc(size); }
|
||||
~IppAutoBuffer() { Release(); }
|
||||
T* Alloc(int size) { m_pBuffer = (T*)ippMalloc(size); return m_pBuffer; }
|
||||
void Release() { if(m_pBuffer) ippFree(m_pBuffer); }
|
||||
inline operator T* () { return (T*)m_pBuffer;}
|
||||
inline operator const T* () const { return (const T*)m_pBuffer;}
|
||||
private:
|
||||
// Disable copy operations
|
||||
IppAutoBuffer(IppAutoBuffer &) {};
|
||||
IppAutoBuffer& operator =(const IppAutoBuffer &) {return *this;};
|
||||
|
||||
T* m_pBuffer;
|
||||
};
|
||||
|
||||
#else
|
||||
#define IPP_VERSION_X100 0
|
||||
#endif
|
||||
|
@ -3131,7 +3131,7 @@ static double dotProd_16u(const ushort* src1, const ushort* src2, int len)
|
||||
|
||||
static double dotProd_16s(const short* src1, const short* src2, int len)
|
||||
{
|
||||
#if (ARITHM_USE_IPP == 1)
|
||||
#if (ARITHM_USE_IPP == 1) && (IPP_VERSION_X100 != 900) // bug in IPP 9.0.0
|
||||
CV_IPP_CHECK()
|
||||
{
|
||||
double r = 0;
|
||||
|
@ -1318,6 +1318,12 @@ public:
|
||||
ippFeatures = ippCPUID_SSE;
|
||||
else if(env == "sse2")
|
||||
ippFeatures = ippCPUID_SSE2;
|
||||
else if(env == "sse3")
|
||||
ippFeatures = ippCPUID_SSE3;
|
||||
else if(env == "ssse3")
|
||||
ippFeatures = ippCPUID_SSSE3;
|
||||
else if(env == "sse41")
|
||||
ippFeatures = ippCPUID_SSE41;
|
||||
else if(env == "sse42")
|
||||
ippFeatures = ippCPUID_SSE42;
|
||||
else if(env == "avx")
|
||||
|
@ -4579,7 +4579,11 @@ static bool ipp_filter2D( InputArray _src, OutputArray _dst, int ddepth,
|
||||
int stype = src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype),
|
||||
ktype = kernel.type(), kdepth = CV_MAT_DEPTH(ktype);
|
||||
bool isolated = (borderType & BORDER_ISOLATED) != 0;
|
||||
#if IPP_VERSION_X100 >= 900
|
||||
Point ippAnchor((kernel.cols-1)/2, (kernel.rows-1)/2);
|
||||
#else
|
||||
Point ippAnchor(kernel.cols >> 1, kernel.rows >> 1);
|
||||
#endif
|
||||
int borderTypeNI = borderType & ~BORDER_ISOLATED;
|
||||
IppiBorderType ippBorderType = ippiGetBorderType(borderTypeNI);
|
||||
|
||||
@ -4610,24 +4614,64 @@ static bool ipp_filter2D( InputArray _src, OutputArray _dst, int ddepth,
|
||||
|
||||
if ((status = ippiFilterBorderGetSize(kernelSize, dstRoiSize, dataType, kernelType, cn, &specSize, &bufsize)) >= 0)
|
||||
{
|
||||
IppiFilterBorderSpec * spec = (IppiFilterBorderSpec *)ippMalloc(specSize);
|
||||
Ipp8u * buffer = ippsMalloc_8u(bufsize);
|
||||
IppAutoBuffer<IppiFilterBorderSpec> spec(specSize);
|
||||
IppAutoBuffer<Ipp8u> buffer(bufsize);
|
||||
Ipp32f borderValue[4] = { 0, 0, 0, 0 };
|
||||
|
||||
Mat reversedKernel;
|
||||
flip(kernel, reversedKernel, -1);
|
||||
|
||||
if ((kdepth == CV_32F && (status = ippiFilterBorderInit_32f((const Ipp32f *)reversedKernel.data, kernelSize,
|
||||
dataType, cn, ippRndFinancial, spec)) >= 0 ) ||
|
||||
(kdepth == CV_16S && (status = ippiFilterBorderInit_16s((const Ipp16s *)reversedKernel.data,
|
||||
kernelSize, 0, dataType, cn, ippRndFinancial, spec)) >= 0))
|
||||
if(kdepth == CV_32F)
|
||||
{
|
||||
status = ippFunc(src.data, (int)src.step, dst.data, (int)dst.step, dstRoiSize,
|
||||
ippBorderType, borderValue, spec, buffer);
|
||||
}
|
||||
Ipp32f *pKerBuffer = (Ipp32f*)kernel.data;
|
||||
IppAutoBuffer<Ipp32f> kerTmp;
|
||||
int kerStep = sizeof(Ipp32f)*kernelSize.width;
|
||||
#if IPP_VERSION_X100 >= 900
|
||||
if(kernel.step != kerStep)
|
||||
{
|
||||
kerTmp.Alloc(kerStep*kernelSize.height);
|
||||
if(ippiCopy_32f_C1R((Ipp32f*)kernel.data, (int)kernel.step, kerTmp, kerStep, kernelSize) < 0)
|
||||
return false;
|
||||
pKerBuffer = kerTmp;
|
||||
}
|
||||
#else
|
||||
kerTmp.Alloc(kerStep*kernelSize.height);
|
||||
Mat kerFlip(Size(kernelSize.width, kernelSize.height), CV_32FC1, kerTmp, kerStep);
|
||||
flip(kernel, kerFlip, -1);
|
||||
pKerBuffer = kerTmp;
|
||||
#endif
|
||||
|
||||
ippsFree(buffer);
|
||||
ippsFree(spec);
|
||||
if((status = ippiFilterBorderInit_32f(pKerBuffer, kernelSize,
|
||||
dataType, cn, ippRndFinancial, spec)) >= 0 )
|
||||
{
|
||||
status = ippFunc(src.data, (int)src.step, dst.data, (int)dst.step, dstRoiSize,
|
||||
ippBorderType, borderValue, spec, buffer);
|
||||
}
|
||||
}
|
||||
else if(kdepth == CV_16S)
|
||||
{
|
||||
Ipp16s *pKerBuffer = (Ipp16s*)kernel.data;
|
||||
IppAutoBuffer<Ipp16s> kerTmp;
|
||||
int kerStep = sizeof(Ipp16s)*kernelSize.width;
|
||||
#if IPP_VERSION_X100 >= 900
|
||||
if(kernel.step != kerStep)
|
||||
{
|
||||
kerTmp.Alloc(kerStep*kernelSize.height);
|
||||
if(ippiCopy_16s_C1R((Ipp16s*)kernel.data, (int)kernel.step, kerTmp, kerStep, kernelSize) < 0)
|
||||
return false;
|
||||
pKerBuffer = kerTmp;
|
||||
}
|
||||
#else
|
||||
kerTmp.Alloc(kerStep*kernelSize.height);
|
||||
Mat kerFlip(Size(kernelSize.width, kernelSize.height), CV_16SC1, kerTmp, kerStep);
|
||||
flip(kernel, kerFlip, -1);
|
||||
pKerBuffer = kerTmp;
|
||||
#endif
|
||||
|
||||
if((status = ippiFilterBorderInit_16s(pKerBuffer, kernelSize,
|
||||
0, dataType, cn, ippRndFinancial, spec)) >= 0)
|
||||
{
|
||||
status = ippFunc(src.data, (int)src.step, dst.data, (int)dst.step, dstRoiSize,
|
||||
ippBorderType, borderValue, spec, buffer);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (status >= 0)
|
||||
|
@ -1231,17 +1231,18 @@ static bool ipp_MorphReplicate(int op, const Mat &src, Mat &dst, const Mat &kern
|
||||
}
|
||||
else
|
||||
{
|
||||
#if IPP_VERSION_X100 != 900 // Problems with accuracy in 9.0.0
|
||||
#if IPP_VERSION_X100 >= 900
|
||||
if (((kernel.cols - 1) / 2 != anchor.x) || ((kernel.rows - 1) / 2 != anchor.y)) // Arbitrary anchor is no longer supporeted since IPP 9.0.0
|
||||
if (((kernelSize.width - 1) / 2 != anchor.x) || ((kernelSize.height - 1) / 2 != anchor.y)) // Arbitrary anchor is no longer supporeted since IPP 9.0.0
|
||||
return false;
|
||||
|
||||
#define IPP_MORPH_CASE(cvtype, flavor, data_type) \
|
||||
#define IPP_MORPH_CASE(cvtype, flavor, data_type, cn) \
|
||||
case cvtype: \
|
||||
{\
|
||||
if (op == MORPH_ERODE)\
|
||||
{\
|
||||
int bufSize = 0;\
|
||||
if (0 > ippiFilterMinBorderGetBufferSize(roiSize, kernelSize, ipp##data_type, 1, &bufSize))\
|
||||
if (0 > ippiFilterMinBorderGetBufferSize(roiSize, kernelSize, ipp##data_type, cn, &bufSize))\
|
||||
return false;\
|
||||
AutoBuffer<uchar> buf(bufSize + 64);\
|
||||
uchar* buffer = alignPtr((uchar*)buf, 32);\
|
||||
@ -1250,7 +1251,7 @@ static bool ipp_MorphReplicate(int op, const Mat &src, Mat &dst, const Mat &kern
|
||||
else\
|
||||
{\
|
||||
int bufSize = 0;\
|
||||
if (0 > ippiFilterMaxBorderGetBufferSize(roiSize, kernelSize, ipp##data_type, 1, &bufSize))\
|
||||
if (0 > ippiFilterMaxBorderGetBufferSize(roiSize, kernelSize, ipp##data_type, cn, &bufSize))\
|
||||
return false;\
|
||||
AutoBuffer<uchar> buf(bufSize + 64);\
|
||||
uchar* buffer = alignPtr((uchar*)buf, 32);\
|
||||
@ -1261,7 +1262,7 @@ static bool ipp_MorphReplicate(int op, const Mat &src, Mat &dst, const Mat &kern
|
||||
#else
|
||||
IppiPoint point = {anchor.x, anchor.y};
|
||||
|
||||
#define IPP_MORPH_CASE(cvtype, flavor, data_type) \
|
||||
#define IPP_MORPH_CASE(cvtype, flavor, data_type, cn) \
|
||||
case cvtype: \
|
||||
{\
|
||||
int bufSize = 0;\
|
||||
@ -1279,17 +1280,18 @@ static bool ipp_MorphReplicate(int op, const Mat &src, Mat &dst, const Mat &kern
|
||||
CV_SUPPRESS_DEPRECATED_START
|
||||
switch (type)
|
||||
{
|
||||
IPP_MORPH_CASE(CV_8UC1, 8u_C1R, 8u);
|
||||
IPP_MORPH_CASE(CV_8UC3, 8u_C3R, 8u);
|
||||
IPP_MORPH_CASE(CV_8UC4, 8u_C4R, 8u);
|
||||
IPP_MORPH_CASE(CV_32FC1, 32f_C1R, 32f);
|
||||
IPP_MORPH_CASE(CV_32FC3, 32f_C3R, 32f);
|
||||
IPP_MORPH_CASE(CV_32FC4, 32f_C4R, 32f);
|
||||
IPP_MORPH_CASE(CV_8UC1, 8u_C1R, 8u, 1);
|
||||
IPP_MORPH_CASE(CV_8UC3, 8u_C3R, 8u, 3);
|
||||
IPP_MORPH_CASE(CV_8UC4, 8u_C4R, 8u, 4);
|
||||
IPP_MORPH_CASE(CV_32FC1, 32f_C1R, 32f, 1);
|
||||
IPP_MORPH_CASE(CV_32FC3, 32f_C3R, 32f, 3);
|
||||
IPP_MORPH_CASE(CV_32FC4, 32f_C4R, 32f, 4);
|
||||
default:
|
||||
;
|
||||
}
|
||||
CV_SUPPRESS_DEPRECATED_END
|
||||
#undef IPP_MORPH_CASE
|
||||
#endif
|
||||
}
|
||||
#else
|
||||
CV_UNUSED(op); CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(kernel); CV_UNUSED(ksize); CV_UNUSED(anchor); CV_UNUSED(rectKernel);
|
||||
|
@ -1695,32 +1695,33 @@ static bool ipp_GaussianBlur( InputArray _src, OutputArray _dst, Size ksize,
|
||||
|
||||
if (ippiFilterGaussianGetBufferSize(roiSize, (Ipp32u)ksize.width, dataType, cn, &specSize, &bufferSize) >= 0)
|
||||
{
|
||||
IppFilterGaussianSpec * pSpec = (IppFilterGaussianSpec *)ippMalloc(specSize);
|
||||
Ipp8u * pBuffer = (Ipp8u*)ippMalloc(bufferSize);
|
||||
IppAutoBuffer<IppFilterGaussianSpec> spec(specSize);
|
||||
IppAutoBuffer<Ipp8u> buffer(bufferSize);
|
||||
|
||||
if (ippiFilterGaussianInit(roiSize, (Ipp32u)ksize.width, (Ipp32f)sigma1, ippBorder, dataType, 1, pSpec, pBuffer) >= 0)
|
||||
if (ippiFilterGaussianInit(roiSize, (Ipp32u)ksize.width, (Ipp32f)sigma1, ippBorder, dataType, cn, spec, buffer) >= 0)
|
||||
{
|
||||
#define IPP_FILTER_GAUSS_C1(ippfavor) \
|
||||
{ \
|
||||
typedef Ipp##ippfavor ippType; \
|
||||
ippType borderValues = 0; \
|
||||
status = ippiFilterGaussianBorder_##ippfavor##_C1R(src.ptr<ippType>(), (int)src.step, \
|
||||
dst.ptr<ippType>(), (int)dst.step, roiSize, borderValues, pSpec, pBuffer); \
|
||||
Ipp##ippfavor borderValues = 0; \
|
||||
status = ippiFilterGaussianBorder_##ippfavor##_C1R(src.ptr<Ipp##ippfavor>(), (int)src.step, \
|
||||
dst.ptr<Ipp##ippfavor>(), (int)dst.step, roiSize, borderValues, spec, buffer); \
|
||||
}
|
||||
|
||||
#define IPP_FILTER_GAUSS_CN(ippfavor, ippcn) \
|
||||
{ \
|
||||
typedef Ipp##ippfavor ippType; \
|
||||
ippType borderValues[] = { 0, 0, 0 }; \
|
||||
status = ippiFilterGaussianBorder_##ippfavor##_C##ippcn##R(src.ptr<ippType>(), (int)src.step, \
|
||||
dst.ptr<ippType>(), (int)dst.step, roiSize, borderValues, pSpec, pBuffer); \
|
||||
Ipp##ippfavor borderValues[] = { 0, 0, 0 }; \
|
||||
status = ippiFilterGaussianBorder_##ippfavor##_C##ippcn##R(src.ptr<Ipp##ippfavor>(), (int)src.step, \
|
||||
dst.ptr<Ipp##ippfavor>(), (int)dst.step, roiSize, borderValues, spec, buffer); \
|
||||
}
|
||||
|
||||
IppStatus status = ippStsErr;
|
||||
#if !HAVE_ICV
|
||||
#if IPP_VERSION_X100 > 901 // Buffer overflow in IPP
|
||||
if (type == CV_8UC1)
|
||||
IPP_FILTER_GAUSS_C1(8u)
|
||||
else if (type == CV_8UC3)
|
||||
else
|
||||
#endif
|
||||
if (type == CV_8UC3)
|
||||
IPP_FILTER_GAUSS_CN(8u, 3)
|
||||
else if (type == CV_16UC1)
|
||||
IPP_FILTER_GAUSS_C1(16u)
|
||||
@ -1737,11 +1738,6 @@ static bool ipp_GaussianBlur( InputArray _src, OutputArray _dst, Size ksize,
|
||||
if (type == CV_32FC1)
|
||||
IPP_FILTER_GAUSS_C1(32f)
|
||||
|
||||
if (pSpec)
|
||||
ippFree(pSpec);
|
||||
if (pBuffer)
|
||||
ippFree(pBuffer);
|
||||
|
||||
if(status >= 0)
|
||||
return true;
|
||||
|
||||
|
@ -425,7 +425,7 @@ namespace cv
|
||||
{
|
||||
static bool ipp_integral(InputArray _src, OutputArray _sum, OutputArray _sqsum, OutputArray _tilted, int sdepth, int sqdepth)
|
||||
{
|
||||
#if !defined(HAVE_IPP_ICV_ONLY) // Disabled on ICV due invalid results
|
||||
#if !defined(HAVE_IPP_ICV_ONLY) && (IPP_VERSION_X100 != 900) // Disabled on ICV due invalid results
|
||||
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
|
||||
if( sdepth <= 0 )
|
||||
sdepth = depth == CV_8U ? CV_32S : CV_64F;
|
||||
|
Loading…
Reference in New Issue
Block a user