Merge pull request #2568 from vbystricky:ippicv

This commit is contained in:
Andrey Pavlenko 2014-04-07 15:52:31 +04:00 committed by OpenCV Buildbot
commit f690440533
19 changed files with 666 additions and 523 deletions

View File

@ -22,5 +22,5 @@ PERF_TEST_P(Size_MatType, dft, TEST_MATS_DFT)
TEST_CYCLE() dft(src, dst); TEST_CYCLE() dft(src, dst);
SANITY_CHECK(dst, 1e-5); SANITY_CHECK(dst, 1e-5, ERROR_RELATIVE);
} }

View File

@ -65,8 +65,8 @@ PERF_TEST_P(Size_MatType, meanStdDev, TYPICAL_MATS)
TEST_CYCLE() meanStdDev(src, mean, dev); TEST_CYCLE() meanStdDev(src, mean, dev);
SANITY_CHECK(mean, 1e-6); SANITY_CHECK(mean, 1e-5, ERROR_RELATIVE);
SANITY_CHECK(dev, 1e-6); SANITY_CHECK(dev, 1e-5, ERROR_RELATIVE);
} }
PERF_TEST_P(Size_MatType, meanStdDev_mask, TYPICAL_MATS) PERF_TEST_P(Size_MatType, meanStdDev_mask, TYPICAL_MATS)

View File

@ -458,9 +458,12 @@ static void add8u( const uchar* src1, size_t step1,
const uchar* src2, size_t step2, const uchar* src2, size_t step2,
uchar* dst, size_t step, Size sz, void* ) uchar* dst, size_t step, Size sz, void* )
{ {
IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step); #if (ARITHM_USE_IPP == 1)
ippiAdd_8u_C1RSfs(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz, 0), fixSteps(sz, sizeof(dst[0]), step1, step2, step);
(vBinOp<uchar, OpAdd<uchar>, IF_SIMD(VAdd<uchar>)>(src1, step1, src2, step2, dst, step, sz))); if (0 <= ippiAdd_8u_C1RSfs(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz, 0))
return;
#endif
(vBinOp<uchar, OpAdd<uchar>, IF_SIMD(VAdd<uchar>)>(src1, step1, src2, step2, dst, step, sz));
} }
static void add8s( const schar* src1, size_t step1, static void add8s( const schar* src1, size_t step1,
@ -474,18 +477,24 @@ static void add16u( const ushort* src1, size_t step1,
const ushort* src2, size_t step2, const ushort* src2, size_t step2,
ushort* dst, size_t step, Size sz, void* ) ushort* dst, size_t step, Size sz, void* )
{ {
IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step); #if (ARITHM_USE_IPP == 1)
ippiAdd_16u_C1RSfs(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz, 0), fixSteps(sz, sizeof(dst[0]), step1, step2, step);
(vBinOp<ushort, OpAdd<ushort>, IF_SIMD(VAdd<ushort>)>(src1, step1, src2, step2, dst, step, sz))); if (0 <= ippiAdd_16u_C1RSfs(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz, 0))
return;
#endif
(vBinOp<ushort, OpAdd<ushort>, IF_SIMD(VAdd<ushort>)>(src1, step1, src2, step2, dst, step, sz));
} }
static void add16s( const short* src1, size_t step1, static void add16s( const short* src1, size_t step1,
const short* src2, size_t step2, const short* src2, size_t step2,
short* dst, size_t step, Size sz, void* ) short* dst, size_t step, Size sz, void* )
{ {
IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step); #if (ARITHM_USE_IPP == 1)
ippiAdd_16s_C1RSfs(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz, 0), fixSteps(sz, sizeof(dst[0]), step1, step2, step);
(vBinOp<short, OpAdd<short>, IF_SIMD(VAdd<short>)>(src1, step1, src2, step2, dst, step, sz))); if (0 <= ippiAdd_16s_C1RSfs(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz, 0))
return;
#endif
(vBinOp<short, OpAdd<short>, IF_SIMD(VAdd<short>)>(src1, step1, src2, step2, dst, step, sz));
} }
static void add32s( const int* src1, size_t step1, static void add32s( const int* src1, size_t step1,
@ -499,9 +508,12 @@ static void add32f( const float* src1, size_t step1,
const float* src2, size_t step2, const float* src2, size_t step2,
float* dst, size_t step, Size sz, void* ) float* dst, size_t step, Size sz, void* )
{ {
IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step); #if (ARITHM_USE_IPP == 1)
ippiAdd_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz), fixSteps(sz, sizeof(dst[0]), step1, step2, step);
(vBinOp32<float, OpAdd<float>, IF_SIMD(VAdd<float>)>(src1, step1, src2, step2, dst, step, sz))); if (0 <= ippiAdd_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz))
return;
#endif
(vBinOp32<float, OpAdd<float>, IF_SIMD(VAdd<float>)>(src1, step1, src2, step2, dst, step, sz));
} }
static void add64f( const double* src1, size_t step1, static void add64f( const double* src1, size_t step1,
@ -515,9 +527,12 @@ static void sub8u( const uchar* src1, size_t step1,
const uchar* src2, size_t step2, const uchar* src2, size_t step2,
uchar* dst, size_t step, Size sz, void* ) uchar* dst, size_t step, Size sz, void* )
{ {
IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step); #if (ARITHM_USE_IPP == 1)
ippiSub_8u_C1RSfs(src2, (int)step2, src1, (int)step1, dst, (int)step, (IppiSize&)sz, 0), fixSteps(sz, sizeof(dst[0]), step1, step2, step);
(vBinOp<uchar, OpSub<uchar>, IF_SIMD(VSub<uchar>)>(src1, step1, src2, step2, dst, step, sz))); if (0 <= ippiSub_8u_C1RSfs(src2, (int)step2, src1, (int)step1, dst, (int)step, (IppiSize&)sz, 0))
return;
#endif
(vBinOp<uchar, OpSub<uchar>, IF_SIMD(VSub<uchar>)>(src1, step1, src2, step2, dst, step, sz));
} }
static void sub8s( const schar* src1, size_t step1, static void sub8s( const schar* src1, size_t step1,
@ -531,18 +546,24 @@ static void sub16u( const ushort* src1, size_t step1,
const ushort* src2, size_t step2, const ushort* src2, size_t step2,
ushort* dst, size_t step, Size sz, void* ) ushort* dst, size_t step, Size sz, void* )
{ {
IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step); #if (ARITHM_USE_IPP == 1)
ippiSub_16u_C1RSfs(src2, (int)step2, src1, (int)step1, dst, (int)step, (IppiSize&)sz, 0), fixSteps(sz, sizeof(dst[0]), step1, step2, step);
(vBinOp<ushort, OpSub<ushort>, IF_SIMD(VSub<ushort>)>(src1, step1, src2, step2, dst, step, sz))); if (0 <= ippiSub_16u_C1RSfs(src2, (int)step2, src1, (int)step1, dst, (int)step, (IppiSize&)sz, 0))
return;
#endif
(vBinOp<ushort, OpSub<ushort>, IF_SIMD(VSub<ushort>)>(src1, step1, src2, step2, dst, step, sz));
} }
static void sub16s( const short* src1, size_t step1, static void sub16s( const short* src1, size_t step1,
const short* src2, size_t step2, const short* src2, size_t step2,
short* dst, size_t step, Size sz, void* ) short* dst, size_t step, Size sz, void* )
{ {
IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step); #if (ARITHM_USE_IPP == 1)
ippiSub_16s_C1RSfs(src2, (int)step2, src1, (int)step1, dst, (int)step, (IppiSize&)sz, 0), fixSteps(sz, sizeof(dst[0]), step1, step2, step);
(vBinOp<short, OpSub<short>, IF_SIMD(VSub<short>)>(src1, step1, src2, step2, dst, step, sz))); if (0 <= ippiSub_16s_C1RSfs(src2, (int)step2, src1, (int)step1, dst, (int)step, (IppiSize&)sz, 0))
return;
#endif
(vBinOp<short, OpSub<short>, IF_SIMD(VSub<short>)>(src1, step1, src2, step2, dst, step, sz));
} }
static void sub32s( const int* src1, size_t step1, static void sub32s( const int* src1, size_t step1,
@ -556,9 +577,12 @@ static void sub32f( const float* src1, size_t step1,
const float* src2, size_t step2, const float* src2, size_t step2,
float* dst, size_t step, Size sz, void* ) float* dst, size_t step, Size sz, void* )
{ {
IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step); #if (ARITHM_USE_IPP == 1)
ippiSub_32f_C1R(src2, (int)step2, src1, (int)step1, dst, (int)step, (IppiSize&)sz), fixSteps(sz, sizeof(dst[0]), step1, step2, step);
(vBinOp32<float, OpSub<float>, IF_SIMD(VSub<float>)>(src1, step1, src2, step2, dst, step, sz))); if (0 <= ippiSub_32f_C1R(src2, (int)step2, src1, (int)step1, dst, (int)step, (IppiSize&)sz))
return;
#endif
(vBinOp32<float, OpSub<float>, IF_SIMD(VSub<float>)>(src1, step1, src2, step2, dst, step, sz));
} }
static void sub64f( const double* src1, size_t step1, static void sub64f( const double* src1, size_t step1,
@ -576,26 +600,23 @@ static void max8u( const uchar* src1, size_t step1,
uchar* dst, size_t step, Size sz, void* ) uchar* dst, size_t step, Size sz, void* )
{ {
#if (ARITHM_USE_IPP == 1) #if (ARITHM_USE_IPP == 1)
{
uchar* s1 = (uchar*)src1; uchar* s1 = (uchar*)src1;
uchar* s2 = (uchar*)src2; uchar* s2 = (uchar*)src2;
uchar* d = dst; uchar* d = dst;
fixSteps(sz, sizeof(dst[0]), step1, step2, step); fixSteps(sz, sizeof(dst[0]), step1, step2, step);
for(int i = 0; i < sz.height; i++) int i = 0;
for(; i < sz.height; i++)
{ {
ippsMaxEvery_8u(s1, s2, d, sz.width); if (0 > ippsMaxEvery_8u(s1, s2, d, sz.width))
break;
s1 += step1; s1 += step1;
s2 += step2; s2 += step2;
d += step; d += step;
} }
} if (i == sz.height)
#else return;
vBinOp<uchar, OpMax<uchar>, IF_SIMD(VMax<uchar>)>(src1, step1, src2, step2, dst, step, sz);
#endif #endif
vBinOp<uchar, OpMax<uchar>, IF_SIMD(VMax<uchar>)>(src1, step1, src2, step2, dst, step, sz);
// IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step);
// ippiMaxEvery_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (IppiSize&)sz),
// (vBinOp8<uchar, OpMax<uchar>, IF_SIMD(_VMax8u)>(src1, step1, src2, step2, dst, step, sz)));
} }
static void max8s( const schar* src1, size_t step1, static void max8s( const schar* src1, size_t step1,
@ -610,26 +631,23 @@ static void max16u( const ushort* src1, size_t step1,
ushort* dst, size_t step, Size sz, void* ) ushort* dst, size_t step, Size sz, void* )
{ {
#if (ARITHM_USE_IPP == 1) #if (ARITHM_USE_IPP == 1)
{
ushort* s1 = (ushort*)src1; ushort* s1 = (ushort*)src1;
ushort* s2 = (ushort*)src2; ushort* s2 = (ushort*)src2;
ushort* d = dst; ushort* d = dst;
fixSteps(sz, sizeof(dst[0]), step1, step2, step); fixSteps(sz, sizeof(dst[0]), step1, step2, step);
for(int i = 0; i < sz.height; i++) int i = 0;
for(; i < sz.height; i++)
{ {
ippsMaxEvery_16u(s1, s2, d, sz.width); if (0 > ippsMaxEvery_16u(s1, s2, d, sz.width))
break;
s1 = (ushort*)((uchar*)s1 + step1); s1 = (ushort*)((uchar*)s1 + step1);
s2 = (ushort*)((uchar*)s2 + step2); s2 = (ushort*)((uchar*)s2 + step2);
d = (ushort*)((uchar*)d + step); d = (ushort*)((uchar*)d + step);
} }
} if (i == sz.height)
#else return;
vBinOp<ushort, OpMax<ushort>, IF_SIMD(VMax<ushort>)>(src1, step1, src2, step2, dst, step, sz);
#endif #endif
vBinOp<ushort, OpMax<ushort>, IF_SIMD(VMax<ushort>)>(src1, step1, src2, step2, dst, step, sz);
// IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step);
// ippiMaxEvery_16u_C1R(src1, (int)step1, src2, (int)step2, dst, (IppiSize&)sz),
// (vBinOp16<ushort, OpMax<ushort>, IF_SIMD(_VMax16u)>(src1, step1, src2, step2, dst, step, sz)));
} }
static void max16s( const short* src1, size_t step1, static void max16s( const short* src1, size_t step1,
@ -651,25 +669,23 @@ static void max32f( const float* src1, size_t step1,
float* dst, size_t step, Size sz, void* ) float* dst, size_t step, Size sz, void* )
{ {
#if (ARITHM_USE_IPP == 1) #if (ARITHM_USE_IPP == 1)
{
float* s1 = (float*)src1; float* s1 = (float*)src1;
float* s2 = (float*)src2; float* s2 = (float*)src2;
float* d = dst; float* d = dst;
fixSteps(sz, sizeof(dst[0]), step1, step2, step); fixSteps(sz, sizeof(dst[0]), step1, step2, step);
for(int i = 0; i < sz.height; i++) int i = 0;
for(; i < sz.height; i++)
{ {
ippsMaxEvery_32f(s1, s2, d, sz.width); if (0 > ippsMaxEvery_32f(s1, s2, d, sz.width))
break;
s1 = (float*)((uchar*)s1 + step1); s1 = (float*)((uchar*)s1 + step1);
s2 = (float*)((uchar*)s2 + step2); s2 = (float*)((uchar*)s2 + step2);
d = (float*)((uchar*)d + step); d = (float*)((uchar*)d + step);
} }
} if (i == sz.height)
#else return;
vBinOp32<float, OpMax<float>, IF_SIMD(VMax<float>)>(src1, step1, src2, step2, dst, step, sz);
#endif #endif
// IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step); vBinOp32<float, OpMax<float>, IF_SIMD(VMax<float>)>(src1, step1, src2, step2, dst, step, sz);
// ippiMaxEvery_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (IppiSize&)sz),
// (vBinOp32f<OpMax<float>, IF_SIMD(_VMax32f)>(src1, step1, src2, step2, dst, step, sz)));
} }
static void max64f( const double* src1, size_t step1, static void max64f( const double* src1, size_t step1,
@ -684,26 +700,23 @@ static void min8u( const uchar* src1, size_t step1,
uchar* dst, size_t step, Size sz, void* ) uchar* dst, size_t step, Size sz, void* )
{ {
#if (ARITHM_USE_IPP == 1) #if (ARITHM_USE_IPP == 1)
{
uchar* s1 = (uchar*)src1; uchar* s1 = (uchar*)src1;
uchar* s2 = (uchar*)src2; uchar* s2 = (uchar*)src2;
uchar* d = dst; uchar* d = dst;
fixSteps(sz, sizeof(dst[0]), step1, step2, step); fixSteps(sz, sizeof(dst[0]), step1, step2, step);
for(int i = 0; i < sz.height; i++) int i = 0;
for(; i < sz.height; i++)
{ {
ippsMinEvery_8u(s1, s2, d, sz.width); if (0 > ippsMinEvery_8u(s1, s2, d, sz.width))
break;
s1 += step1; s1 += step1;
s2 += step2; s2 += step2;
d += step; d += step;
} }
} if (i == sz.height)
#else return;
vBinOp<uchar, OpMin<uchar>, IF_SIMD(VMin<uchar>)>(src1, step1, src2, step2, dst, step, sz);
#endif #endif
vBinOp<uchar, OpMin<uchar>, IF_SIMD(VMin<uchar>)>(src1, step1, src2, step2, dst, step, sz);
// IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step);
// ippiMinEvery_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (IppiSize&)sz),
// (vBinOp8<uchar, OpMin<uchar>, IF_SIMD(_VMin8u)>(src1, step1, src2, step2, dst, step, sz)));
} }
static void min8s( const schar* src1, size_t step1, static void min8s( const schar* src1, size_t step1,
@ -718,26 +731,23 @@ static void min16u( const ushort* src1, size_t step1,
ushort* dst, size_t step, Size sz, void* ) ushort* dst, size_t step, Size sz, void* )
{ {
#if (ARITHM_USE_IPP == 1) #if (ARITHM_USE_IPP == 1)
{
ushort* s1 = (ushort*)src1; ushort* s1 = (ushort*)src1;
ushort* s2 = (ushort*)src2; ushort* s2 = (ushort*)src2;
ushort* d = dst; ushort* d = dst;
fixSteps(sz, sizeof(dst[0]), step1, step2, step); fixSteps(sz, sizeof(dst[0]), step1, step2, step);
for(int i = 0; i < sz.height; i++) int i = 0;
for(; i < sz.height; i++)
{ {
ippsMinEvery_16u(s1, s2, d, sz.width); if (0 > ippsMinEvery_16u(s1, s2, d, sz.width))
break;
s1 = (ushort*)((uchar*)s1 + step1); s1 = (ushort*)((uchar*)s1 + step1);
s2 = (ushort*)((uchar*)s2 + step2); s2 = (ushort*)((uchar*)s2 + step2);
d = (ushort*)((uchar*)d + step); d = (ushort*)((uchar*)d + step);
} }
} if (i == sz.height)
#else return;
vBinOp<ushort, OpMin<ushort>, IF_SIMD(VMin<ushort>)>(src1, step1, src2, step2, dst, step, sz);
#endif #endif
vBinOp<ushort, OpMin<ushort>, IF_SIMD(VMin<ushort>)>(src1, step1, src2, step2, dst, step, sz);
// IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step);
// ippiMinEvery_16u_C1R(src1, (int)step1, src2, (int)step2, dst, (IppiSize&)sz),
// (vBinOp16<ushort, OpMin<ushort>, IF_SIMD(_VMin16u)>(src1, step1, src2, step2, dst, step, sz)));
} }
static void min16s( const short* src1, size_t step1, static void min16s( const short* src1, size_t step1,
@ -759,25 +769,23 @@ static void min32f( const float* src1, size_t step1,
float* dst, size_t step, Size sz, void* ) float* dst, size_t step, Size sz, void* )
{ {
#if (ARITHM_USE_IPP == 1) #if (ARITHM_USE_IPP == 1)
{
float* s1 = (float*)src1; float* s1 = (float*)src1;
float* s2 = (float*)src2; float* s2 = (float*)src2;
float* d = dst; float* d = dst;
fixSteps(sz, sizeof(dst[0]), step1, step2, step); fixSteps(sz, sizeof(dst[0]), step1, step2, step);
for(int i = 0; i < sz.height; i++) int i = 0;
for(; i < sz.height; i++)
{ {
ippsMinEvery_32f(s1, s2, d, sz.width); if (0 > ippsMinEvery_32f(s1, s2, d, sz.width))
break;
s1 = (float*)((uchar*)s1 + step1); s1 = (float*)((uchar*)s1 + step1);
s2 = (float*)((uchar*)s2 + step2); s2 = (float*)((uchar*)s2 + step2);
d = (float*)((uchar*)d + step); d = (float*)((uchar*)d + step);
} }
} if (i == sz.height)
#else return;
vBinOp32<float, OpMin<float>, IF_SIMD(VMin<float>)>(src1, step1, src2, step2, dst, step, sz);
#endif #endif
// IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step); vBinOp32<float, OpMin<float>, IF_SIMD(VMin<float>)>(src1, step1, src2, step2, dst, step, sz);
// ippiMinEvery_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (IppiSize&)sz),
// (vBinOp32f<OpMin<float>, IF_SIMD(_VMin32f)>(src1, step1, src2, step2, dst, step, sz)));
} }
static void min64f( const double* src1, size_t step1, static void min64f( const double* src1, size_t step1,
@ -791,9 +799,12 @@ static void absdiff8u( const uchar* src1, size_t step1,
const uchar* src2, size_t step2, const uchar* src2, size_t step2,
uchar* dst, size_t step, Size sz, void* ) uchar* dst, size_t step, Size sz, void* )
{ {
IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step); #if (ARITHM_USE_IPP == 1)
ippiAbsDiff_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz), fixSteps(sz, sizeof(dst[0]), step1, step2, step);
(vBinOp<uchar, OpAbsDiff<uchar>, IF_SIMD(VAbsDiff<uchar>)>(src1, step1, src2, step2, dst, step, sz))); if (0 <= ippiAbsDiff_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz))
return;
#endif
(vBinOp<uchar, OpAbsDiff<uchar>, IF_SIMD(VAbsDiff<uchar>)>(src1, step1, src2, step2, dst, step, sz));
} }
static void absdiff8s( const schar* src1, size_t step1, static void absdiff8s( const schar* src1, size_t step1,
@ -807,9 +818,12 @@ static void absdiff16u( const ushort* src1, size_t step1,
const ushort* src2, size_t step2, const ushort* src2, size_t step2,
ushort* dst, size_t step, Size sz, void* ) ushort* dst, size_t step, Size sz, void* )
{ {
IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step); #if (ARITHM_USE_IPP == 1)
ippiAbsDiff_16u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz), fixSteps(sz, sizeof(dst[0]), step1, step2, step);
(vBinOp<ushort, OpAbsDiff<ushort>, IF_SIMD(VAbsDiff<ushort>)>(src1, step1, src2, step2, dst, step, sz))); if (0 <= ippiAbsDiff_16u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz))
return;
#endif
(vBinOp<ushort, OpAbsDiff<ushort>, IF_SIMD(VAbsDiff<ushort>)>(src1, step1, src2, step2, dst, step, sz));
} }
static void absdiff16s( const short* src1, size_t step1, static void absdiff16s( const short* src1, size_t step1,
@ -830,9 +844,12 @@ static void absdiff32f( const float* src1, size_t step1,
const float* src2, size_t step2, const float* src2, size_t step2,
float* dst, size_t step, Size sz, void* ) float* dst, size_t step, Size sz, void* )
{ {
IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step); #if (ARITHM_USE_IPP == 1)
ippiAbsDiff_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz), fixSteps(sz, sizeof(dst[0]), step1, step2, step);
(vBinOp32<float, OpAbsDiff<float>, IF_SIMD(VAbsDiff<float>)>(src1, step1, src2, step2, dst, step, sz))); if (0 <= ippiAbsDiff_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz))
return;
#endif
(vBinOp32<float, OpAbsDiff<float>, IF_SIMD(VAbsDiff<float>)>(src1, step1, src2, step2, dst, step, sz));
} }
static void absdiff64f( const double* src1, size_t step1, static void absdiff64f( const double* src1, size_t step1,
@ -847,36 +864,48 @@ static void and8u( const uchar* src1, size_t step1,
const uchar* src2, size_t step2, const uchar* src2, size_t step2,
uchar* dst, size_t step, Size sz, void* ) uchar* dst, size_t step, Size sz, void* )
{ {
IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step); #if (ARITHM_USE_IPP == 1)
ippiAnd_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz), fixSteps(sz, sizeof(dst[0]), step1, step2, step);
(vBinOp<uchar, OpAnd<uchar>, IF_SIMD(VAnd<uchar>)>(src1, step1, src2, step2, dst, step, sz))); if (0 <= ippiAnd_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz))
return;
#endif
(vBinOp<uchar, OpAnd<uchar>, IF_SIMD(VAnd<uchar>)>(src1, step1, src2, step2, dst, step, sz));
} }
static void or8u( const uchar* src1, size_t step1, static void or8u( const uchar* src1, size_t step1,
const uchar* src2, size_t step2, const uchar* src2, size_t step2,
uchar* dst, size_t step, Size sz, void* ) uchar* dst, size_t step, Size sz, void* )
{ {
IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step); #if (ARITHM_USE_IPP == 1)
ippiOr_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz), fixSteps(sz, sizeof(dst[0]), step1, step2, step);
(vBinOp<uchar, OpOr<uchar>, IF_SIMD(VOr<uchar>)>(src1, step1, src2, step2, dst, step, sz))); if (0 <= ippiOr_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz))
return;
#endif
(vBinOp<uchar, OpOr<uchar>, IF_SIMD(VOr<uchar>)>(src1, step1, src2, step2, dst, step, sz));
} }
static void xor8u( const uchar* src1, size_t step1, static void xor8u( const uchar* src1, size_t step1,
const uchar* src2, size_t step2, const uchar* src2, size_t step2,
uchar* dst, size_t step, Size sz, void* ) uchar* dst, size_t step, Size sz, void* )
{ {
IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step); #if (ARITHM_USE_IPP == 1)
ippiXor_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz), fixSteps(sz, sizeof(dst[0]), step1, step2, step);
(vBinOp<uchar, OpXor<uchar>, IF_SIMD(VXor<uchar>)>(src1, step1, src2, step2, dst, step, sz))); if (0 <= ippiXor_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz))
return;
#endif
(vBinOp<uchar, OpXor<uchar>, IF_SIMD(VXor<uchar>)>(src1, step1, src2, step2, dst, step, sz));
} }
static void not8u( const uchar* src1, size_t step1, static void not8u( const uchar* src1, size_t step1,
const uchar* src2, size_t step2, const uchar* src2, size_t step2,
uchar* dst, size_t step, Size sz, void* ) uchar* dst, size_t step, Size sz, void* )
{ {
IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step); (void *)src2; #if (ARITHM_USE_IPP == 1)
ippiNot_8u_C1R(src1, (int)step1, dst, (int)step, (IppiSize&)sz), fixSteps(sz, sizeof(dst[0]), step1, step2, step); (void *)src2;
(vBinOp<uchar, OpNot<uchar>, IF_SIMD(VNot<uchar>)>(src1, step1, src2, step2, dst, step, sz))); if (0 <= ippiNot_8u_C1R(src1, (int)step1, dst, (int)step, (IppiSize&)sz))
return;
#endif
(vBinOp<uchar, OpNot<uchar>, IF_SIMD(VNot<uchar>)>(src1, step1, src2, step2, dst, step, sz));
} }
/****************************************************************************************\ /****************************************************************************************\
@ -2357,7 +2386,7 @@ static void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t ste
if( op >= 0 ) if( op >= 0 )
{ {
fixSteps(size, sizeof(dst[0]), step1, step2, step); fixSteps(size, sizeof(dst[0]), step1, step2, step);
if( ippiCompare_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)size, op) >= 0 ) if (0 <= ippiCompare_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)size, op))
return; return;
} }
#endif #endif
@ -2440,7 +2469,7 @@ static void cmp16u(const ushort* src1, size_t step1, const ushort* src2, size_t
if( op >= 0 ) if( op >= 0 )
{ {
fixSteps(size, sizeof(dst[0]), step1, step2, step); fixSteps(size, sizeof(dst[0]), step1, step2, step);
if( ippiCompare_16u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)size, op) >= 0 ) if (0 <= ippiCompare_16u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)size, op))
return; return;
} }
#endif #endif
@ -2455,7 +2484,7 @@ static void cmp16s(const short* src1, size_t step1, const short* src2, size_t st
if( op > 0 ) if( op > 0 )
{ {
fixSteps(size, sizeof(dst[0]), step1, step2, step); fixSteps(size, sizeof(dst[0]), step1, step2, step);
if( ippiCompare_16s_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)size, op) >= 0 ) if (0 <= ippiCompare_16s_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)size, op))
return; return;
} }
#endif #endif
@ -2561,7 +2590,7 @@ static void cmp32f(const float* src1, size_t step1, const float* src2, size_t st
if( op >= 0 ) if( op >= 0 )
{ {
fixSteps(size, sizeof(dst[0]), step1, step2, step); fixSteps(size, sizeof(dst[0]), step1, step2, step);
if( ippiCompare_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)size, op) >= 0 ) if (0 <= ippiCompare_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)size, op))
return; return;
} }
#endif #endif

View File

@ -812,8 +812,6 @@ typedef union
} }
DBLINT; DBLINT;
#ifndef HAVE_IPP
#define EXPTAB_SCALE 6 #define EXPTAB_SCALE 6
#define EXPTAB_MASK ((1 << EXPTAB_SCALE) - 1) #define EXPTAB_MASK ((1 << EXPTAB_SCALE) - 1)
@ -1275,13 +1273,26 @@ static void Exp_64f( const double *_x, double *y, int n )
#undef EXPTAB_MASK #undef EXPTAB_MASK
#undef EXPPOLY_32F_A0 #undef EXPPOLY_32F_A0
#else #ifdef HAVE_IPP
static void Exp_32f_ipp(const float *x, float *y, int n)
{
if (0 <= ippsExp_32f_A21(x, y, n))
return;
Exp_32f(x, y, n);
}
#define Exp_32f ippsExp_32f_A21 static void Exp_64f_ipp(const double *x, double *y, int n)
#define Exp_64f ippsExp_64f_A50 {
if (0 <= ippsExp_64f_A50(x, y, n))
return;
Exp_64f(x, y, n);
}
#define Exp_32f Exp_32f_ipp
#define Exp_64f Exp_64f_ipp
#endif #endif
void exp( InputArray _src, OutputArray _dst ) void exp( InputArray _src, OutputArray _dst )
{ {
int type = _src.type(), depth = _src.depth(), cn = _src.channels(); int type = _src.type(), depth = _src.depth(), cn = _src.channels();
@ -1313,8 +1324,6 @@ void exp( InputArray _src, OutputArray _dst )
* L O G * * L O G *
\****************************************************************************************/ \****************************************************************************************/
#ifndef HAVE_IPP
#define LOGTAB_SCALE 8 #define LOGTAB_SCALE 8
#define LOGTAB_MASK ((1 << LOGTAB_SCALE) - 1) #define LOGTAB_MASK ((1 << LOGTAB_SCALE) - 1)
#define LOGTAB_MASK2 ((1 << (20 - LOGTAB_SCALE)) - 1) #define LOGTAB_MASK2 ((1 << (20 - LOGTAB_SCALE)) - 1)
@ -1922,11 +1931,23 @@ static void Log_64f( const double *x, double *y, int n )
} }
} }
#else #ifdef HAVE_IPP
static void Log_32f_ipp(const float *x, float *y, int n)
{
if (0 <= ippsLn_32f_A21(x, y, n))
return;
Log_32f(x, y, n);
}
#define Log_32f ippsLn_32f_A21 static void Log_64f_ipp(const double *x, double *y, int n)
#define Log_64f ippsLn_64f_A50 {
if (0 <= ippsLn_64f_A50(x, y, n))
return;
Log_64f(x, y, n);
}
#define Log_32f Log_32f_ipp
#define Log_64f Log_64f_ipp
#endif #endif
void log( InputArray _src, OutputArray _dst ) void log( InputArray _src, OutputArray _dst )

View File

@ -44,10 +44,6 @@
#include "opencl_kernels.hpp" #include "opencl_kernels.hpp"
#include "opencv2/core/opencl/runtime/opencl_clamdblas.hpp" #include "opencv2/core/opencl/runtime/opencl_clamdblas.hpp"
#ifdef HAVE_IPP
#include "ippversion.h"
#endif
namespace cv namespace cv
{ {
@ -2803,11 +2799,11 @@ static double dotProd_8u(const uchar* src1, const uchar* src2, int len)
{ {
double r = 0; double r = 0;
#if ARITHM_USE_IPP #if ARITHM_USE_IPP
ippiDotProd_8u64f_C1R(src1, (int)(len*sizeof(src1[0])), if (0 <= ippiDotProd_8u64f_C1R(src1, (int)(len*sizeof(src1[0])),
src2, (int)(len*sizeof(src2[0])), src2, (int)(len*sizeof(src2[0])),
ippiSize(len, 1), &r); ippiSize(len, 1), &r))
return r; return r;
#else #endif
int i = 0; int i = 0;
#if CV_SSE2 #if CV_SSE2
@ -2853,7 +2849,6 @@ static double dotProd_8u(const uchar* src1, const uchar* src2, int len)
} }
#endif #endif
return r + dotProd_(src1, src2, len - i); return r + dotProd_(src1, src2, len - i);
#endif
} }
@ -2864,48 +2859,52 @@ static double dotProd_8s(const schar* src1, const schar* src2, int len)
static double dotProd_16u(const ushort* src1, const ushort* src2, int len) static double dotProd_16u(const ushort* src1, const ushort* src2, int len)
{ {
#if (ARITHM_USE_IPP == 1)
double r = 0; double r = 0;
IF_IPP(ippiDotProd_16u64f_C1R(src1, (int)(len*sizeof(src1[0])), if (0 <= ippiDotProd_16u64f_C1R(src1, (int)(len*sizeof(src1[0])), src2, (int)(len*sizeof(src2[0])), ippiSize(len, 1), &r))
src2, (int)(len*sizeof(src2[0])),
ippiSize(len, 1), &r),
r = dotProd_(src1, src2, len));
return r; return r;
#endif
return dotProd_(src1, src2, len);
} }
static double dotProd_16s(const short* src1, const short* src2, int len) static double dotProd_16s(const short* src1, const short* src2, int len)
{ {
#if (ARITHM_USE_IPP == 1)
double r = 0; double r = 0;
IF_IPP(ippiDotProd_16s64f_C1R(src1, (int)(len*sizeof(src1[0])), if (0 <= ippiDotProd_16s64f_C1R(src1, (int)(len*sizeof(src1[0])), src2, (int)(len*sizeof(src2[0])), ippiSize(len, 1), &r))
src2, (int)(len*sizeof(src2[0])),
ippiSize(len, 1), &r),
r = dotProd_(src1, src2, len));
return r; return r;
#endif
return dotProd_(src1, src2, len);
} }
static double dotProd_32s(const int* src1, const int* src2, int len) static double dotProd_32s(const int* src1, const int* src2, int len)
{ {
#if (ARITHM_USE_IPP == 1)
double r = 0; double r = 0;
IF_IPP(ippiDotProd_32s64f_C1R(src1, (int)(len*sizeof(src1[0])), if (0 <= ippiDotProd_32s64f_C1R(src1, (int)(len*sizeof(src1[0])), src2, (int)(len*sizeof(src2[0])), ippiSize(len, 1), &r))
src2, (int)(len*sizeof(src2[0])),
ippiSize(len, 1), &r),
r = dotProd_(src1, src2, len));
return r; return r;
#endif
return dotProd_(src1, src2, len);
} }
static double dotProd_32f(const float* src1, const float* src2, int len) static double dotProd_32f(const float* src1, const float* src2, int len)
{ {
#if (ARITHM_USE_IPP == 1)
double r = 0; double r = 0;
IF_IPP(ippsDotProd_32f64f(src1, src2, len, &r), if (0 <= ippsDotProd_32f64f(src1, src2, len, &r))
r = dotProd_(src1, src2, len));
return r; return r;
#endif
return dotProd_(src1, src2, len);
} }
static double dotProd_64f(const double* src1, const double* src2, int len) static double dotProd_64f(const double* src1, const double* src2, int len)
{ {
#if (ARITHM_USE_IPP == 1)
double r = 0; double r = 0;
IF_IPP(ippsDotProd_64f(src1, src2, len, &r), if (0 <= ippsDotProd_64f(src1, src2, len, &r))
r = dotProd_(src1, src2, len));
return r; return r;
#endif
return dotProd_(src1, src2, len);
} }

View File

@ -199,10 +199,8 @@ enum { BLOCK_SIZE = 1024 };
#if defined HAVE_IPP && (IPP_VERSION_MAJOR >= 7) #if defined HAVE_IPP && (IPP_VERSION_MAJOR >= 7)
#define ARITHM_USE_IPP 1 #define ARITHM_USE_IPP 1
#define IF_IPP(then_call, else_call) then_call
#else #else
#define ARITHM_USE_IPP 0 #define ARITHM_USE_IPP 0
#define IF_IPP(then_call, else_call) else_call
#endif #endif
inline bool checkScalar(const Mat& sc, int atype, int sckind, int akind) inline bool checkScalar(const Mat& sc, int atype, int sckind, int akind)

View File

@ -43,7 +43,6 @@
#include "precomp.hpp" #include "precomp.hpp"
#include "opencl_kernels.hpp" #include "opencl_kernels.hpp"
#include <climits> #include <climits>
#include <limits>
namespace cv namespace cv
{ {
@ -972,7 +971,9 @@ void cv::meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, Input
ippiMeanStdDevFuncC1 ippFuncC1 = ippiMeanStdDevFuncC1 ippFuncC1 =
type == CV_8UC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_8u_C1R : type == CV_8UC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_8u_C1R :
type == CV_16UC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_16u_C1R : type == CV_16UC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_16u_C1R :
//type == CV_32FC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_32f_C1R ://Aug 2013: bug in IPP 7.1, 8.0 #if (IPP_VERSION_MAJOR >= 8) && (IPP_VERSION_MINOR >= 1)
type == CV_32FC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_32f_C1R ://Aug 2013: bug in IPP 7.1, 8.0
#endif
0; 0;
if( ippFuncC1 ) if( ippFuncC1 )
{ {
@ -2111,8 +2112,10 @@ double cv::norm( InputArray _src, int normType, InputArray _mask )
type == CV_16UC3 ? (ippiNormFuncNoHint)ippiNorm_Inf_16u_C3R : type == CV_16UC3 ? (ippiNormFuncNoHint)ippiNorm_Inf_16u_C3R :
type == CV_16UC4 ? (ippiNormFuncNoHint)ippiNorm_Inf_16u_C4R : type == CV_16UC4 ? (ippiNormFuncNoHint)ippiNorm_Inf_16u_C4R :
type == CV_16SC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_16s_C1R : type == CV_16SC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_16s_C1R :
//type == CV_16SC3 ? (ippiNormFunc)ippiNorm_Inf_16s_C3R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768 #if (IPP_VERSION_MAJOR >= 8) && (IPP_VERSION_MINOR >= 1)
//type == CV_16SC4 ? (ippiNormFunc)ippiNorm_Inf_16s_C4R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768 type == CV_16SC3 ? (ippiNormFuncNoHint)ippiNorm_Inf_16s_C3R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768
type == CV_16SC4 ? (ippiNormFuncNoHint)ippiNorm_Inf_16s_C4R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768
#endif
type == CV_32FC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_32f_C1R : type == CV_32FC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_32f_C1R :
type == CV_32FC3 ? (ippiNormFuncNoHint)ippiNorm_Inf_32f_C3R : type == CV_32FC3 ? (ippiNormFuncNoHint)ippiNorm_Inf_32f_C3R :
type == CV_32FC4 ? (ippiNormFuncNoHint)ippiNorm_Inf_32f_C4R : type == CV_32FC4 ? (ippiNormFuncNoHint)ippiNorm_Inf_32f_C4R :
@ -2541,8 +2544,10 @@ double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _m
type == CV_16UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16u_C3R : type == CV_16UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16u_C3R :
type == CV_16UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16u_C4R : type == CV_16UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16u_C4R :
type == CV_16SC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16s_C1R : type == CV_16SC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16s_C1R :
//type == CV_16SC3 ? (ippiNormDiffFunc)ippiNormDiff_Inf_16s_C3R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768 #if (IPP_VERSION_MAJOR >= 8) && (IPP_VERSION_MINOR >= 1)
//type == CV_16SC4 ? (ippiNormDiffFunc)ippiNormDiff_Inf_16s_C4R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768 type == CV_16SC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16s_C3R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768
type == CV_16SC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16s_C4R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768
#endif
type == CV_32FC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_32f_C1R : type == CV_32FC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_32f_C1R :
type == CV_32FC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_32f_C3R : type == CV_32FC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_32f_C3R :
type == CV_32FC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_32f_C4R : type == CV_32FC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_32f_C4R :

View File

@ -397,17 +397,17 @@ int64 getCPUTickCount(void)
#else #else
#ifdef HAVE_IPP //#ifdef HAVE_IPP
int64 getCPUTickCount(void) //int64 getCPUTickCount(void)
{ //{
return ippGetCpuClocks(); // return ippGetCpuClocks();
} //}
#else //#else
int64 getCPUTickCount(void) int64 getCPUTickCount(void)
{ {
return getTickCount(); return getTickCount();
} }
#endif //#endif
#endif #endif

View File

@ -1,4 +1,4 @@
Feature Detection Feature Detection
================= =================
.. highlight:: cpp .. highlight:: cpp
@ -15,9 +15,9 @@ Finds edges in an image using the [Canny86]_ algorithm.
.. ocv:cfunction:: void cvCanny( const CvArr* image, CvArr* edges, double threshold1, double threshold2, int aperture_size=3 ) .. ocv:cfunction:: void cvCanny( const CvArr* image, CvArr* edges, double threshold1, double threshold2, int aperture_size=3 )
:param image: single-channel 8-bit input image. :param image: 8-bit input image.
:param edges: output edge map; it has the same size and type as ``image`` . :param edges: output edge map; single channels 8-bit image, which has the same size as ``image`` .
:param threshold1: first threshold for the hysteresis procedure. :param threshold1: first threshold for the hysteresis procedure.

View File

@ -34,5 +34,5 @@ PERF_TEST_P( TestBilateralFilter, BilateralFilter,
TEST_CYCLE() bilateralFilter(src, dst, d, sigmaColor, sigmaSpace, BORDER_DEFAULT); TEST_CYCLE() bilateralFilter(src, dst, d, sigmaColor, sigmaSpace, BORDER_DEFAULT);
SANITY_CHECK(dst); SANITY_CHECK(dst, .01, ERROR_RELATIVE);
} }

View File

@ -42,13 +42,13 @@
#include "precomp.hpp" #include "precomp.hpp"
#include "opencl_kernels.hpp" #include "opencl_kernels.hpp"
/*
#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
#define USE_IPP_CANNY 1 #define USE_IPP_CANNY 1
#else #else
#undef USE_IPP_CANNY #undef USE_IPP_CANNY
#endif #endif
*/
namespace cv namespace cv
{ {
@ -286,7 +286,7 @@ void cv::Canny( InputArray _src, OutputArray _dst,
#endif #endif
#ifdef USE_IPP_CANNY #ifdef USE_IPP_CANNY
if( aperture_size == 3 && !L2gradient && if( aperture_size == 3 && !L2gradient && 1 == cn &&
ippCanny(src, dst, (float)low_thresh, (float)high_thresh) ) ippCanny(src, dst, (float)low_thresh, (float)high_thresh) )
return; return;
#endif #endif

View File

@ -252,6 +252,7 @@ bool CvtColorIPPLoopCopy(Mat& src, Mat& dst, const Cvt& cvt)
} }
bool ok; bool ok;
parallel_for_(Range(0, source.rows), CvtColorIPPLoop_Invoker<Cvt>(source, dst, cvt, &ok), source.total()/(double)(1<<16) ); parallel_for_(Range(0, source.rows), CvtColorIPPLoop_Invoker<Cvt>(source, dst, cvt, &ok), source.total()/(double)(1<<16) );
//ok = cvt(src.ptr<uchar>(0), (int)src.step[0], dst.ptr<uchar>(0), (int)dst.step[0], src.cols, src.rows);
return ok; return ok;
} }
@ -297,11 +298,13 @@ static ippiReorderFunc ippiSwapChannelsC3RTab[] =
0, (ippiReorderFunc)ippiSwapChannels_32f_C3R, 0, 0 0, (ippiReorderFunc)ippiSwapChannels_32f_C3R, 0, 0
}; };
#if (IPP_VERSION_MAJOR >= 8) && (IPP_VERSION_MINOR >= 1)
static ippiReorderFunc ippiSwapChannelsC4RTab[] = static ippiReorderFunc ippiSwapChannelsC4RTab[] =
{ {
(ippiReorderFunc)ippiSwapChannels_8u_AC4R, 0, (ippiReorderFunc)ippiSwapChannels_16u_AC4R, 0, (ippiReorderFunc)ippiSwapChannels_8u_C4R, 0, (ippiReorderFunc)ippiSwapChannels_16u_C4R, 0,
0, (ippiReorderFunc)ippiSwapChannels_32f_AC4R, 0, 0 0, (ippiReorderFunc)ippiSwapChannels_32f_C4R, 0, 0
}; };
#endif
static ippiColor2GrayFunc ippiColor2GrayC3Tab[] = static ippiColor2GrayFunc ippiColor2GrayC3Tab[] =
{ {
@ -3251,11 +3254,13 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
if( CvtColorIPPLoopCopy(src, dst, IPPReorderFunctor(ippiSwapChannelsC3RTab[depth], 2, 1, 0)) ) if( CvtColorIPPLoopCopy(src, dst, IPPReorderFunctor(ippiSwapChannelsC3RTab[depth], 2, 1, 0)) )
return; return;
} }
#if (IPP_VERSION_MAJOR >= 8) && (IPP_VERSION_MINOR >= 1)
else if( code == CV_RGBA2BGRA ) else if( code == CV_RGBA2BGRA )
{ {
if( CvtColorIPPLoopCopy(src, dst, IPPReorderFunctor(ippiSwapChannelsC4RTab[depth], 2, 1, 0)) ) if( CvtColorIPPLoopCopy(src, dst, IPPReorderFunctor(ippiSwapChannelsC4RTab[depth], 2, 1, 0)) )
return; return;
} }
#endif
#endif #endif
if( depth == CV_8U ) if( depth == CV_8U )
@ -3310,14 +3315,17 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
CV_Assert( scn == 3 || scn == 4 ); CV_Assert( scn == 3 || scn == 4 );
_dst.create(sz, CV_MAKETYPE(depth, 1)); _dst.create(sz, CV_MAKETYPE(depth, 1));
dst = _dst.getMat(); dst = _dst.getMat();
/* /**/
#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
/*
if( code == CV_BGR2GRAY ) if( code == CV_BGR2GRAY )
{ {
if( CvtColorIPPLoop(src, dst, IPPColor2GrayFunctor(ippiColor2GrayC3Tab[depth])) ) if( CvtColorIPPLoop(src, dst, IPPColor2GrayFunctor(ippiColor2GrayC3Tab[depth])) )
return; return;
} }
else if( code == CV_RGB2GRAY ) else
*/
if( code == CV_RGB2GRAY )
{ {
if( CvtColorIPPLoop(src, dst, IPPGeneralFunctor(ippiRGB2GrayC3Tab[depth])) ) if( CvtColorIPPLoop(src, dst, IPPGeneralFunctor(ippiRGB2GrayC3Tab[depth])) )
return; return;
@ -3333,7 +3341,7 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
return; return;
} }
#endif #endif
*/ /**/
bidx = code == CV_BGR2GRAY || code == CV_BGRA2GRAY ? 0 : 2; bidx = code == CV_BGR2GRAY || code == CV_BGRA2GRAY ? 0 : 2;
if( depth == CV_8U ) if( depth == CV_8U )

View File

@ -212,71 +212,71 @@ static bool IPPDerivScharr(const Mat& src, Mat& dst, int ddepth, int dx, int dy,
{ {
if ((dx == 1) && (dy == 0)) if ((dx == 1) && (dy == 0))
{ {
ippiFilterScharrVertGetBufferSize_8u16s_C1R(roi,&bufSize); if (0 > ippiFilterScharrVertGetBufferSize_8u16s_C1R(roi,&bufSize))
return false;
buffer.allocate(bufSize); buffer.allocate(bufSize);
return (0 <= ippiFilterScharrVertBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step,
ippiFilterScharrVertBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step, (Ipp16s*)dst.data, (int)dst.step, roi, ippBorderRepl, 0, (Ipp8u*)(char*)buffer));
(Ipp16s*)dst.data, (int)dst.step, roi, ippBorderRepl, 0, (Ipp8u*)(char*)buffer);
return true;
} }
if ((dx == 0) && (dy == 1)) if ((dx == 0) && (dy == 1))
{ {
ippiFilterScharrHorizGetBufferSize_8u16s_C1R(roi,&bufSize); if (0 > ippiFilterScharrHorizGetBufferSize_8u16s_C1R(roi,&bufSize))
return false;
buffer.allocate(bufSize); buffer.allocate(bufSize);
return (0 <= ippiFilterScharrHorizBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step,
ippiFilterScharrHorizBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step, (Ipp16s*)dst.data, (int)dst.step, roi, ippBorderRepl, 0, (Ipp8u*)(char*)buffer));
(Ipp16s*)dst.data, (int)dst.step, roi, ippBorderRepl, 0, (Ipp8u*)(char*)buffer);
return true;
} }
return false;
} }
default: default:
return false; return false;
} }
} }
case CV_32F: case CV_32F:
{ {
switch(dst.type()) switch(dst.type())
{ {
case CV_32F: case CV_32F:
{
if ((dx == 1) && (dy == 0)) if ((dx == 1) && (dy == 0))
{ {
ippiFilterScharrVertGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows),&bufSize); if (0 > ippiFilterScharrVertGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows),&bufSize))
return false;
buffer.allocate(bufSize); buffer.allocate(bufSize);
ippiFilterScharrVertBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step, if (0 > ippiFilterScharrVertBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step,
(Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows),
ippBorderRepl, 0, (Ipp8u*)(char*)buffer); ippBorderRepl, 0, (Ipp8u*)(char*)buffer))
if(scale != 1) {
/* IPP is fast, so MulC produce very little perf degradation */ return false;
ippiMulC_32f_C1IR((Ipp32f)scale, (Ipp32f*)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows));
return true;
} }
if (scale != 1)
/* IPP is fast, so MulC produce very little perf degradation.*/
//ippiMulC_32f_C1IR((Ipp32f)scale, (Ipp32f*)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows));
ippiMulC_32f_C1R((Ipp32f*)dst.data, (int)dst.step, (Ipp32f)scale, (Ipp32f*)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows));
return true;
}
if ((dx == 0) && (dy == 1)) if ((dx == 0) && (dy == 1))
{ {
ippiFilterScharrHorizGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows),&bufSize); if (0 > ippiFilterScharrHorizGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows),&bufSize))
return false;
buffer.allocate(bufSize); buffer.allocate(bufSize);
ippiFilterScharrHorizBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step, if (0 > ippiFilterScharrHorizBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step,
(Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows),
ippBorderRepl, 0, (Ipp8u*)(char*)buffer); ippBorderRepl, 0, (Ipp8u*)(char*)buffer))
if(scale != 1) return false;
ippiMulC_32f_C1IR((Ipp32f)scale, (Ipp32f *)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows));
if (scale != 1)
ippiMulC_32f_C1R((Ipp32f *)dst.data, (int)dst.step, (Ipp32f)scale, (Ipp32f *)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows));
return true; return true;
} }
}
default: default:
return false; return false;
} }
} }
default: default:
return false; return false;
} }
@ -287,7 +287,6 @@ static bool IPPDeriv(const Mat& src, Mat& dst, int ddepth, int dx, int dy, int k
{ {
int bufSize = 0; int bufSize = 0;
cv::AutoBuffer<char> buffer; cv::AutoBuffer<char> buffer;
if (ksize == 3 || ksize == 5) if (ksize == 3 || ksize == 5)
{ {
if ( ddepth < 0 ) if ( ddepth < 0 )
@ -297,49 +296,46 @@ static bool IPPDeriv(const Mat& src, Mat& dst, int ddepth, int dx, int dy, int k
{ {
if ((dx == 1) && (dy == 0)) if ((dx == 1) && (dy == 0))
{ {
ippiFilterSobelNegVertGetBufferSize_8u16s_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize); if (0 > ippiFilterSobelNegVertGetBufferSize_8u16s_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize))
return false;
buffer.allocate(bufSize); buffer.allocate(bufSize);
ippiFilterSobelNegVertBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step, return (0 <= ippiFilterSobelNegVertBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step,
(Ipp16s*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), (Ipp16s*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),
ippBorderRepl, 0, (Ipp8u*)(char*)buffer); ippBorderRepl, 0, (Ipp8u*)(char*)buffer));
return true;
} }
if ((dx == 0) && (dy == 1)) if ((dx == 0) && (dy == 1))
{ {
ippiFilterSobelHorizGetBufferSize_8u16s_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize); if (0 > ippiFilterSobelHorizGetBufferSize_8u16s_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize))
return false;
buffer.allocate(bufSize); buffer.allocate(bufSize);
ippiFilterSobelHorizBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step, return (0 <= ippiFilterSobelHorizBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step,
(Ipp16s*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), (Ipp16s*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),
ippBorderRepl, 0, (Ipp8u*)(char*)buffer); ippBorderRepl, 0, (Ipp8u*)(char*)buffer));
return true;
} }
if ((dx == 2) && (dy == 0)) if ((dx == 2) && (dy == 0))
{ {
ippiFilterSobelVertSecondGetBufferSize_8u16s_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize); if (0 > ippiFilterSobelVertSecondGetBufferSize_8u16s_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize))
return false;
buffer.allocate(bufSize); buffer.allocate(bufSize);
ippiFilterSobelVertSecondBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step, return (0 <= ippiFilterSobelVertSecondBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step,
(Ipp16s*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), (Ipp16s*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),
ippBorderRepl, 0, (Ipp8u*)(char*)buffer); ippBorderRepl, 0, (Ipp8u*)(char*)buffer));
return true;
} }
if ((dx == 0) && (dy == 2)) if ((dx == 0) && (dy == 2))
{ {
ippiFilterSobelHorizSecondGetBufferSize_8u16s_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize); if (0 > ippiFilterSobelHorizSecondGetBufferSize_8u16s_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize))
return false;
buffer.allocate(bufSize); buffer.allocate(bufSize);
ippiFilterSobelHorizSecondBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step, return (0 <= ippiFilterSobelHorizSecondBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step,
(Ipp16s*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), (Ipp16s*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),
ippBorderRepl, 0, (Ipp8u*)(char*)buffer); ippBorderRepl, 0, (Ipp8u*)(char*)buffer));
return true;
} }
} }
@ -347,57 +343,70 @@ static bool IPPDeriv(const Mat& src, Mat& dst, int ddepth, int dx, int dy, int k
{ {
if ((dx == 1) && (dy == 0)) if ((dx == 1) && (dy == 0))
{ {
ippiFilterSobelNegVertGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), &bufSize); if (0 > ippiFilterSobelNegVertGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), &bufSize))
return false;
buffer.allocate(bufSize); buffer.allocate(bufSize);
ippiFilterSobelNegVertBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step, if (0 > ippiFilterSobelNegVertBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step,
(Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), (Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),
ippBorderRepl, 0, (Ipp8u*)(char*)buffer); ippBorderRepl, 0, (Ipp8u*)(char*)buffer))
{
return false;
}
if(scale != 1) if(scale != 1)
ippiMulC_32f_C1IR((Ipp32f)scale, (Ipp32f *)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows)); ippiMulC_32f_C1R((Ipp32f *)dst.data, (int)dst.step, (Ipp32f)scale, (Ipp32f *)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows));
return true; return true;
} }
if ((dx == 0) && (dy == 1)) if ((dx == 0) && (dy == 1))
{ {
ippiFilterSobelHorizGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize); if (0 > ippiFilterSobelHorizGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize))
return false;
buffer.allocate(bufSize); buffer.allocate(bufSize);
ippiFilterSobelHorizBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step, if (0 > ippiFilterSobelHorizBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step,
(Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), (Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),
ippBorderRepl, 0, (Ipp8u*)(char*)buffer); ippBorderRepl, 0, (Ipp8u*)(char*)buffer))
{
return false;
}
if(scale != 1) if(scale != 1)
ippiMulC_32f_C1IR((Ipp32f)scale, (Ipp32f *)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows)); ippiMulC_32f_C1R((Ipp32f *)dst.data, (int)dst.step, (Ipp32f)scale, (Ipp32f *)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows));
return true; return true;
} }
if((dx == 2) && (dy == 0)) if((dx == 2) && (dy == 0))
{ {
ippiFilterSobelVertSecondGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize); if (0 > ippiFilterSobelVertSecondGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize))
return false;
buffer.allocate(bufSize); buffer.allocate(bufSize);
ippiFilterSobelVertSecondBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step, if (0 > ippiFilterSobelVertSecondBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step,
(Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), (Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),
ippBorderRepl, 0, (Ipp8u*)(char*)buffer); ippBorderRepl, 0, (Ipp8u*)(char*)buffer))
{
return false;
}
if(scale != 1) if(scale != 1)
ippiMulC_32f_C1IR((Ipp32f)scale, (Ipp32f *)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows)); ippiMulC_32f_C1R((Ipp32f *)dst.data, (int)dst.step, (Ipp32f)scale, (Ipp32f *)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows));
return true; return true;
} }
if((dx == 0) && (dy == 2)) if((dx == 0) && (dy == 2))
{ {
ippiFilterSobelHorizSecondGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize); if (0 > ippiFilterSobelHorizSecondGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize))
return false;
buffer.allocate(bufSize); buffer.allocate(bufSize);
ippiFilterSobelHorizSecondBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step, if (0 > ippiFilterSobelHorizSecondBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step,
(Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), (Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),
ippBorderRepl, 0, (Ipp8u*)(char*)buffer); ippBorderRepl, 0, (Ipp8u*)(char*)buffer))
if(scale != 1) {
ippiMulC_32f_C1IR((Ipp32f)scale, (Ipp32f *)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows)); return false;
}
if(scale != 1)
ippiMulC_32f_C1R((Ipp32f *)dst.data, (int)dst.step, (Ipp32f)scale, (Ipp32f *)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows));
return true; return true;
} }
} }
@ -405,7 +414,6 @@ static bool IPPDeriv(const Mat& src, Mat& dst, int ddepth, int dx, int dy, int k
if(ksize <= 0) if(ksize <= 0)
return IPPDerivScharr(src, dst, ddepth, dx, dy, scale); return IPPDerivScharr(src, dst, ddepth, dx, dy, scale);
return false; return false;
} }

View File

@ -1420,36 +1420,16 @@ struct RowVec_32f
int operator()(const uchar* _src, uchar* _dst, int width, int cn) const int operator()(const uchar* _src, uchar* _dst, int width, int cn) const
{ {
#ifdef USE_IPP_SEP_FILTERS
int ret = ippiOperator(_src, _dst, width, cn);
if (ret > 0)
return ret;
#endif
int _ksize = kernel.rows + kernel.cols - 1; int _ksize = kernel.rows + kernel.cols - 1;
const float* src0 = (const float*)_src; const float* src0 = (const float*)_src;
float* dst = (float*)_dst; float* dst = (float*)_dst;
const float* _kx = (const float*)kernel.data; const float* _kx = (const float*)kernel.data;
#ifdef USE_IPP_SEP_FILTERS
IppiSize roisz = { width, 1 };
if( (cn == 1 || cn == 3) && width >= _ksize*8 )
{
if( bufsz < 0 )
{
if( (cn == 1 && ippiFilterRowBorderPipelineGetBufferSize_32f_C1R(roisz, _ksize, &bufsz) < 0) ||
(cn == 3 && ippiFilterRowBorderPipelineGetBufferSize_32f_C3R(roisz, _ksize, &bufsz) < 0))
return 0;
}
AutoBuffer<uchar> buf(bufsz + 64);
uchar* bufptr = alignPtr((uchar*)buf, 32);
int step = (int)(width*sizeof(dst[0])*cn);
float borderValue[] = {0.f, 0.f, 0.f};
// here is the trick. IPP needs border type and extrapolates the row. We did it already.
// So we pass anchor=0 and ignore the right tail of results since they are incorrect there.
if( (cn == 1 && ippiFilterRowBorderPipeline_32f_C1R(src0, step, &dst, roisz, _kx, _ksize, 0,
ippBorderRepl, borderValue[0], bufptr) < 0) ||
(cn == 3 && ippiFilterRowBorderPipeline_32f_C3R(src0, step, &dst, roisz, _kx, _ksize, 0,
ippBorderRepl, borderValue, bufptr) < 0))
return 0;
return width - _ksize + 1;
}
#endif
if( !haveSSE ) if( !haveSSE )
return 0; return 0;
@ -1479,7 +1459,38 @@ struct RowVec_32f
Mat kernel; Mat kernel;
bool haveSSE; bool haveSSE;
#ifdef USE_IPP_SEP_FILTERS #ifdef USE_IPP_SEP_FILTERS
private:
mutable int bufsz; mutable int bufsz;
int ippiOperator(const uchar* _src, uchar* _dst, int width, int cn) const
{
int _ksize = kernel.rows + kernel.cols - 1;
if ((1 != cn && 3 != cn) || width < _ksize*8)
return 0;
const float* src = (const float*)_src;
float* dst = (float*)_dst;
const float* _kx = (const float*)kernel.data;
IppiSize roisz = { width, 1 };
if( bufsz < 0 )
{
if( (cn == 1 && ippiFilterRowBorderPipelineGetBufferSize_32f_C1R(roisz, _ksize, &bufsz) < 0) ||
(cn == 3 && ippiFilterRowBorderPipelineGetBufferSize_32f_C3R(roisz, _ksize, &bufsz) < 0))
return 0;
}
AutoBuffer<uchar> buf(bufsz + 64);
uchar* bufptr = alignPtr((uchar*)buf, 32);
int step = (int)(width*sizeof(dst[0])*cn);
float borderValue[] = {0.f, 0.f, 0.f};
// here is the trick. IPP needs border type and extrapolates the row. We did it already.
// So we pass anchor=0 and ignore the right tail of results since they are incorrect there.
if( (cn == 1 && ippiFilterRowBorderPipeline_32f_C1R(src, step, &dst, roisz, _kx, _ksize, 0,
ippBorderRepl, borderValue[0], bufptr) < 0) ||
(cn == 3 && ippiFilterRowBorderPipeline_32f_C3R(src, step, &dst, roisz, _kx, _ksize, 0,
ippBorderRepl, borderValue, bufptr) < 0))
return 0;
return width - _ksize + 1;
}
#endif #endif
}; };

View File

@ -1971,7 +1971,7 @@ public:
CHECK_IPP_STATUS(getBufferSizeFunc(pSpec, dstSize, cn, &bufsize)); CHECK_IPP_STATUS(getBufferSizeFunc(pSpec, dstSize, cn, &bufsize));
CHECK_IPP_STATUS(getSrcOffsetFunc(pSpec, dstOffset, &srcOffset)); CHECK_IPP_STATUS(getSrcOffsetFunc(pSpec, dstOffset, &srcOffset));
Ipp8u* pSrc = (Ipp8u*)src.data + (int)src.step[0] * srcOffset.y + srcOffset.x * cn * itemSize; const Ipp8u* pSrc = (const Ipp8u*)src.data + (int)src.step[0] * srcOffset.y + srcOffset.x * cn * itemSize;
Ipp8u* pDst = (Ipp8u*)dst.data + (int)dst.step[0] * dstOffset.y + dstOffset.x * cn * itemSize; Ipp8u* pDst = (Ipp8u*)dst.data + (int)dst.step[0] * dstOffset.y + dstOffset.x * cn * itemSize;
AutoBuffer<uchar> buf(bufsize + 64); AutoBuffer<uchar> buf(bufsize + 64);
@ -1980,7 +1980,6 @@ public:
if( func( pSrc, (int)src.step[0], pDst, (int)dst.step[0], dstOffset, dstSize, ippBorderRepl, 0, pSpec, bufptr ) < 0 ) if( func( pSrc, (int)src.step[0], pDst, (int)dst.step[0], dstOffset, dstSize, ippBorderRepl, 0, pSpec, bufptr ) < 0 )
*ok = false; *ok = false;
} }
private: private:
const Mat & src; const Mat & src;
Mat & dst; Mat & dst;

View File

@ -1136,7 +1136,7 @@ private:
Scalar borderValue; Scalar borderValue;
}; };
#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 8) && (IPP_VERSION_MINOR >= 1)
static bool IPPMorphReplicate(int op, const Mat &src, Mat &dst, const Mat &kernel, static bool IPPMorphReplicate(int op, const Mat &src, Mat &dst, const Mat &kernel,
const Size& ksize, const Point &anchor, bool rectKernel) const Size& ksize, const Point &anchor, bool rectKernel)
{ {
@ -1148,68 +1148,116 @@ static bool IPPMorphReplicate(int op, const Mat &src, Mat &dst, const Mat &kerne
src.copyTo(temp); src.copyTo(temp);
_src = &temp; _src = &temp;
} }
//DEPRECATED. Allocates and initializes morphology state structure for erosion or dilation operation.
typedef IppStatus (CV_STDCALL* ippiMorphologyInitAllocFunc)(int, const void*, IppiSize, IppiPoint, IppiMorphState **);
typedef IppStatus (CV_STDCALL* ippiMorphologyBorderReplicateFunc)(const void*, int, void *, int,
IppiSize, IppiBorderType, IppiMorphState *);
typedef IppStatus (CV_STDCALL* ippiFilterMinMaxGetBufferSizeFunc)(int, IppiSize, int*);
typedef IppStatus (CV_STDCALL* ippiFilterMinMaxBorderReplicateFunc)(const void*, int, void*, int,
IppiSize, IppiSize, IppiPoint, void*);
ippiMorphologyInitAllocFunc initAllocFunc = 0;
ippiMorphologyBorderReplicateFunc morphFunc = 0;
ippiFilterMinMaxGetBufferSizeFunc getBufSizeFunc = 0;
ippiFilterMinMaxBorderReplicateFunc morphRectFunc = 0;
#define IPP_MORPH_CASE(type, flavor) \
case type: \
initAllocFunc = (ippiMorphologyInitAllocFunc)ippiMorphologyInitAlloc_##flavor; \
morphFunc = op == MORPH_ERODE ? (ippiMorphologyBorderReplicateFunc)ippiErodeBorderReplicate_##flavor : \
(ippiMorphologyBorderReplicateFunc)ippiDilateBorderReplicate_##flavor; \
getBufSizeFunc = (ippiFilterMinMaxGetBufferSizeFunc)ippiFilterMinGetBufferSize_##flavor; \
morphRectFunc = op == MORPH_ERODE ? (ippiFilterMinMaxBorderReplicateFunc)ippiFilterMinBorderReplicate_##flavor : \
(ippiFilterMinMaxBorderReplicateFunc)ippiFilterMaxBorderReplicate_##flavor; \
break
switch( type )
{
IPP_MORPH_CASE(CV_8UC1, 8u_C1R);
IPP_MORPH_CASE(CV_8UC3, 8u_C3R);
IPP_MORPH_CASE(CV_8UC4, 8u_C4R);
IPP_MORPH_CASE(CV_32FC1, 32f_C1R);
IPP_MORPH_CASE(CV_32FC3, 32f_C3R);
IPP_MORPH_CASE(CV_32FC4, 32f_C4R);
default:
return false;
}
#undef IPP_MORPH_CASE
IppiSize roiSize = {src.cols, src.rows}; IppiSize roiSize = {src.cols, src.rows};
IppiSize kernelSize = {ksize.width, ksize.height}; IppiSize kernelSize = {ksize.width, ksize.height};
if (!rectKernel)
{
#if 1
if (((kernel.cols - 1) / 2 != anchor.x) || ((kernel.rows - 1) / 2 != anchor.y))
return false;
#define IPP_MORPH_CASE(cvtype, flavor, data_type) \
case cvtype: \
{\
int specSize = 0, bufferSize = 0;\
if (0 > ippiMorphologyBorderGetSize_##flavor(roiSize.width, kernelSize, &specSize, &bufferSize))\
return false;\
IppiMorphState *pSpec = (IppiMorphState*)ippMalloc(specSize);\
Ipp8u *pBuffer = (Ipp8u*)ippMalloc(bufferSize);\
if (0 > ippiMorphologyBorderInit_##flavor(roiSize.width, kernel.data, kernelSize, pSpec, pBuffer))\
{\
ippFree(pBuffer);\
ippFree(pSpec);\
return false;\
}\
bool ok = false;\
if (op == MORPH_ERODE)\
ok = (0 <= ippiErodeBorder_##flavor((Ipp##data_type *)_src->data, (int)_src->step[0], (Ipp##data_type *)dst.data, (int)dst.step[0],\
roiSize, ippBorderRepl, 0, pSpec, pBuffer));\
else\
ok = (0 <= ippiDilateBorder_##flavor((Ipp##data_type *)_src->data, (int)_src->step[0], (Ipp##data_type *)dst.data, (int)dst.step[0],\
roiSize, ippBorderRepl, 0, pSpec, pBuffer));\
ippFree(pBuffer);\
ippFree(pSpec);\
return ok;\
}\
break;
#else
IppiPoint point = {anchor.x, anchor.y};
// this is case, which can be used with the anchor not in center of the kernel, but
// ippiMorphologyBorderGetSize_, ippiErodeBorderReplicate_ and ippiDilateBorderReplicate_ are deprecated.
#define IPP_MORPH_CASE(cvtype, flavor, data_type) \
case cvtype: \
{\
int specSize = 0;\
int bufferSize = 0;\
if (0 > ippiMorphologyGetSize_##flavor( roiSize.width, kernel.data kernelSize, &specSize))\
return false;\
bool ok = false;\
IppiMorphState* pState = (IppiMorphState*)ippMalloc(specSize);\
if (ippiMorphologyInit_##flavor(roiSize.width, kernel.data, kernelSize, point, pState) >= 0)\
{\
if (op == MORPH_ERODE)\
ok = ippiErodeBorderReplicate_##flavor((Ipp##data_type *)_src->data, (int)_src->step[0],\
(Ipp##data_type *)dst.data, (int)dst.step[0],\
roiSize, ippBorderRepl, pState ) >= 0;\
else\
ok = ippiDilateBorderReplicate_##flavor((Ipp##data_type *)_src->data, (int)_src->step[0],\
(Ipp##data_type *)dst.data, (int)dst.step[0],\
roiSize, ippBorderRepl, pState ) >= 0;\
}\
ippFree(pState);\
return ok;\
}\
break;
#endif
switch (type)
{
IPP_MORPH_CASE(CV_8UC1, 8u_C1R, 8u);
IPP_MORPH_CASE(CV_8UC3, 8u_C3R, 8u);
IPP_MORPH_CASE(CV_8UC4, 8u_C4R, 8u);
IPP_MORPH_CASE(CV_32FC1, 32f_C1R, 32f);
IPP_MORPH_CASE(CV_32FC3, 32f_C3R, 32f);
IPP_MORPH_CASE(CV_32FC4, 32f_C4R, 32f);
default:
return false;
}
#undef IPP_MORPH_CASE
}
else
{
IppiPoint point = {anchor.x, anchor.y}; IppiPoint point = {anchor.x, anchor.y};
if( !rectKernel && morphFunc && initAllocFunc ) #define IPP_MORPH_CASE(cvtype, flavor, data_type) \
case cvtype: \
{\
int bufSize = 0;\
if (0 > ippiFilterMinGetBufferSize_##flavor(src.cols, kernelSize, &bufSize))\
return false;\
AutoBuffer<uchar> buf(bufSize + 64);\
uchar* buffer = alignPtr((uchar*)buf, 32);\
if (op == MORPH_ERODE)\
return (0 <= ippiFilterMinBorderReplicate_##flavor((Ipp##data_type *)_src->data, (int)_src->step[0], (Ipp##data_type *)dst.data, (int)dst.step[0], roiSize, kernelSize, point, buffer));\
return (0 <= ippiFilterMaxBorderReplicate_##flavor((Ipp##data_type *)_src->data, (int)_src->step[0], (Ipp##data_type *)dst.data, (int)dst.step[0], roiSize, kernelSize, point, buffer));\
}\
break;
switch (type)
{ {
IppiMorphState* pState; IPP_MORPH_CASE(CV_8UC1, 8u_C1R, 8u);
if( initAllocFunc( roiSize.width, kernel.data, kernelSize, point, &pState ) < 0 ) IPP_MORPH_CASE(CV_8UC3, 8u_C3R, 8u);
IPP_MORPH_CASE(CV_8UC4, 8u_C4R, 8u);
IPP_MORPH_CASE(CV_32FC1, 32f_C1R, 32f);
IPP_MORPH_CASE(CV_32FC3, 32f_C3R, 32f);
IPP_MORPH_CASE(CV_32FC4, 32f_C4R, 32f);
default:
return false; return false;
bool is_ok = morphFunc( _src->data, (int)_src->step[0],
dst.data, (int)dst.step[0],
roiSize, ippBorderRepl, pState ) >= 0;
ippiMorphologyFree(pState);
return is_ok;
} }
else if( rectKernel && morphRectFunc && getBufSizeFunc )
{ #undef IPP_MORPH_CASE
int bufSize = 0;
if( getBufSizeFunc( src.cols, kernelSize, &bufSize) < 0 )
return false;
AutoBuffer<uchar> buf(bufSize + 64);
uchar* buffer = alignPtr((uchar*)buf, 32);
return morphRectFunc(_src->data, (int)_src->step[0], dst.data, (int)dst.step[0],
roiSize, kernelSize, point, buffer) >= 0;
} }
return false;
} }
static bool IPPMorphOp(int op, InputArray _src, OutputArray _dst, static bool IPPMorphOp(int op, InputArray _src, OutputArray _dst,
@ -1411,7 +1459,7 @@ static void morphOp( int op, InputArray _src, OutputArray _dst,
Size ksize = kernel.data ? kernel.size() : Size(3,3); Size ksize = kernel.data ? kernel.size() : Size(3,3);
anchor = normalizeAnchor(anchor, ksize); anchor = normalizeAnchor(anchor, ksize);
#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 8) && (IPP_VERSION_MINOR >= 1)
if( IPPMorphOp(op, _src, _dst, kernel, anchor, iterations, borderType, borderValue) ) if( IPPMorphOp(op, _src, _dst, kernel, anchor, iterations, borderType, borderValue) )
return; return;
#endif #endif

View File

@ -1109,21 +1109,28 @@ void cv::GaussianBlur( InputArray _src, OutputArray _dst, Size ksize,
return; return;
#endif #endif
#if defined HAVE_IPP && (IPP_VERSION_MAJOR >= 7) #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 8) && (IPP_VERSION_MINOR >= 1)
if( type == CV_32FC1 && sigma1 == sigma2 && ksize.width == ksize.height && sigma1 != 0.0 ) if( type == CV_32FC1 && sigma1 == sigma2 && ksize.width == ksize.height && sigma1 != 0.0 )
{ {
Mat src = _src.getMat(), dst = _dst.getMat(); Mat src = _src.getMat(), dst = _dst.getMat();
IppiSize roi = { src.cols, src.rows }; IppiSize roi = { src.cols, src.rows };
int bufSize = 0; int specSize = 0, bufferSize = 0;
ippiFilterGaussGetBufferSize_32f_C1R(roi, ksize.width, &bufSize); if (0 <= ippiFilterGaussianGetBufferSize(roi, (Ipp32u)ksize.width, ipp32f, 1, &specSize, &bufferSize))
AutoBuffer<uchar> buf(bufSize+128); {
if( ippiFilterGaussBorder_32f_C1R((const Ipp32f *)src.data, (int)src.step, IppFilterGaussianSpec *pSpec = (IppFilterGaussianSpec*)ippMalloc(specSize);
Ipp8u *pBuffer = (Ipp8u*)ippMalloc(bufferSize);
if (0 <= ippiFilterGaussianInit(roi, (Ipp32u)ksize.width, (Ipp32f)sigma1, (IppiBorderType)borderType, ipp32f, 1, pSpec, pBuffer))
{
IppStatus sts = ippiFilterGaussianBorder_32f_C1R( (const Ipp32f *)src.data, (int)src.step,
(Ipp32f *)dst.data, (int)dst.step, (Ipp32f *)dst.data, (int)dst.step,
roi, ksize.width, (Ipp32f)sigma1, roi, 0.0, pSpec, pBuffer);
(IppiBorderType)borderType, 0.0, ippFree(pBuffer);
alignPtr(&buf[0],32)) >= 0 ) ippFree(pSpec);
if (0 <= sts)
return; return;
} }
}
}
#endif #endif
Mat kx, ky; Mat kx, ky;
@ -2180,11 +2187,19 @@ public:
IppiSize kernel = {d, d}; IppiSize kernel = {d, d};
IppiSize roi={dst.cols, range.end - range.start}; IppiSize roi={dst.cols, range.end - range.start};
int bufsize=0; int bufsize=0;
ippiFilterBilateralGetBufSize_8u_C1R( ippiFilterBilateralGauss, roi, kernel, &bufsize); if (0 > ippiFilterBilateralGetBufSize_8u_C1R( ippiFilterBilateralGauss, roi, kernel, &bufsize))
{
*ok = false;
return;
}
AutoBuffer<uchar> buf(bufsize); AutoBuffer<uchar> buf(bufsize);
IppiFilterBilateralSpec *pSpec = (IppiFilterBilateralSpec *)alignPtr(&buf[0], 32); IppiFilterBilateralSpec *pSpec = (IppiFilterBilateralSpec *)alignPtr(&buf[0], 32);
ippiFilterBilateralInit_8u_C1R( ippiFilterBilateralGauss, kernel, (Ipp32f)sigma_color, (Ipp32f)sigma_space, 1, pSpec ); if (0 > ippiFilterBilateralInit_8u_C1R( ippiFilterBilateralGauss, kernel, (Ipp32f)sigma_color, (Ipp32f)sigma_space, 1, pSpec ))
if( ippiFilterBilateral_8u_C1R( src.ptr<uchar>(range.start) + radius * ((int)src.step[0] + 1), (int)src.step[0], dst.ptr<uchar>(range.start), (int)dst.step[0], roi, kernel, pSpec ) < 0) {
*ok = false;
return;
}
if (0 > ippiFilterBilateral_8u_C1R( src.ptr<uchar>(range.start) + radius * ((int)src.step[0] + 1), (int)src.step[0], dst.ptr<uchar>(range.start), (int)dst.step[0], roi, kernel, pSpec ))
*ok = false; *ok = false;
} }
private: private:

View File

@ -365,29 +365,31 @@ void cv::integral( InputArray _src, OutputArray _sum, OutputArray _sqsum, Output
#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
if( ( depth == CV_8U ) && ( sdepth == CV_32F || sdepth == CV_32S ) && ( !_tilted.needed() ) && ( !_sqsum.needed() || sqdepth == CV_64F ) && ( cn == 1 ) ) if( ( depth == CV_8U ) && ( sdepth == CV_32F || sdepth == CV_32S ) && ( !_tilted.needed() ) && ( !_sqsum.needed() || sqdepth == CV_64F ) && ( cn == 1 ) )
{ {
IppStatus status = ippStsErr;
IppiSize srcRoiSize = ippiSize( src.cols, src.rows ); IppiSize srcRoiSize = ippiSize( src.cols, src.rows );
if( sdepth == CV_32F ) if( sdepth == CV_32F )
{ {
if( _sqsum.needed() ) if( _sqsum.needed() )
{ {
ippiSqrIntegral_8u32f64f_C1R( (const Ipp8u*)src.data, (int)src.step, (Ipp32f*)sum.data, (int)sum.step, (Ipp64f*)sqsum.data, (int)sqsum.step, srcRoiSize, 0, 0 ); status = ippiSqrIntegral_8u32f64f_C1R( (const Ipp8u*)src.data, (int)src.step, (Ipp32f*)sum.data, (int)sum.step, (Ipp64f*)sqsum.data, (int)sqsum.step, srcRoiSize, 0, 0 );
} }
else else
{ {
ippiIntegral_8u32f_C1R( (const Ipp8u*)src.data, (int)src.step, (Ipp32f*)sum.data, (int)sum.step, srcRoiSize, 0 ); status = ippiIntegral_8u32f_C1R( (const Ipp8u*)src.data, (int)src.step, (Ipp32f*)sum.data, (int)sum.step, srcRoiSize, 0 );
} }
} }
else if( sdepth == CV_32S ) else if( sdepth == CV_32S )
{ {
if( _sqsum.needed() ) if( _sqsum.needed() )
{ {
ippiSqrIntegral_8u32s64f_C1R( (const Ipp8u*)src.data, (int)src.step, (Ipp32s*)sum.data, (int)sum.step, (Ipp64f*)sqsum.data, (int)sqsum.step, srcRoiSize, 0, 0 ); status = ippiSqrIntegral_8u32s64f_C1R( (const Ipp8u*)src.data, (int)src.step, (Ipp32s*)sum.data, (int)sum.step, (Ipp64f*)sqsum.data, (int)sqsum.step, srcRoiSize, 0, 0 );
} }
else else
{ {
ippiIntegral_8u32s_C1R( (const Ipp8u*)src.data, (int)src.step, (Ipp32s*)sum.data, (int)sum.step, srcRoiSize, 0 ); status = ippiIntegral_8u32s_C1R( (const Ipp8u*)src.data, (int)src.step, (Ipp32s*)sum.data, (int)sum.step, srcRoiSize, 0 );
} }
} }
if (0 <= status)
return; return;
} }
#endif #endif

View File

@ -1318,9 +1318,9 @@ public:
if( cascade->hid_cascade->ipp_stages ) if( cascade->hid_cascade->ipp_stages )
{ {
IppiRect iequRect = {equRect.x, equRect.y, equRect.width, equRect.height}; IppiRect iequRect = {equRect.x, equRect.y, equRect.width, equRect.height};
ippiRectStdDev_32f_C1R(sum1.ptr<float>(y1), sum1.step, ippiRectStdDev_32f_C1R(sum1.ptr<float>(y1), (int)sum1.step,
sqsum1.ptr<double>(y1), sqsum1.step, sqsum1.ptr<double>(y1), (int)sqsum1.step,
norm1->ptr<float>(y1), norm1->step, norm1->ptr<float>(y1), (int)norm1->step,
ippiSize(ssz.width, ssz.height), iequRect ); ippiSize(ssz.width, ssz.height), iequRect );
int positive = (ssz.width/ystep)*((ssz.height + ystep-1)/ystep); int positive = (ssz.width/ystep)*((ssz.height + ystep-1)/ystep);
@ -1341,9 +1341,9 @@ public:
for( int j = 0; j < cascade->count; j++ ) for( int j = 0; j < cascade->count; j++ )
{ {
if( ippiApplyHaarClassifier_32f_C1R( if( ippiApplyHaarClassifier_32f_C1R(
sum1.ptr<float>(y1), sum1.step, sum1.ptr<float>(y1), (int)sum1.step,
norm1->ptr<float>(y1), norm1->step, norm1->ptr<float>(y1), (int)norm1->step,
mask1->ptr<uchar>(y1), mask1->step, mask1->ptr<uchar>(y1), (int)mask1->step,
ippiSize(ssz.width, ssz.height), &positive, ippiSize(ssz.width, ssz.height), &positive,
cascade->hid_cascade->stage_classifier[j].threshold, cascade->hid_cascade->stage_classifier[j].threshold,
(IppiHaarClassifier_32f*)cascade->hid_cascade->ipp_stages[j]) < 0 ) (IppiHaarClassifier_32f*)cascade->hid_cascade->ipp_stages[j]) < 0 )