diff --git a/modules/core/perf/perf_dft.cpp b/modules/core/perf/perf_dft.cpp index 05ae4bc50..a33bcf51e 100644 --- a/modules/core/perf/perf_dft.cpp +++ b/modules/core/perf/perf_dft.cpp @@ -22,5 +22,5 @@ PERF_TEST_P(Size_MatType, dft, TEST_MATS_DFT) TEST_CYCLE() dft(src, dst); - SANITY_CHECK(dst, 1e-5); + SANITY_CHECK(dst, 1e-5, ERROR_RELATIVE); } diff --git a/modules/core/perf/perf_stat.cpp b/modules/core/perf/perf_stat.cpp index 6b5f0ff52..25bad4f52 100644 --- a/modules/core/perf/perf_stat.cpp +++ b/modules/core/perf/perf_stat.cpp @@ -65,8 +65,8 @@ PERF_TEST_P(Size_MatType, meanStdDev, TYPICAL_MATS) TEST_CYCLE() meanStdDev(src, mean, dev); - SANITY_CHECK(mean, 1e-6); - SANITY_CHECK(dev, 1e-6); + SANITY_CHECK(mean, 1e-5, ERROR_RELATIVE); + SANITY_CHECK(dev, 1e-5, ERROR_RELATIVE); } PERF_TEST_P(Size_MatType, meanStdDev_mask, TYPICAL_MATS) diff --git a/modules/core/src/arithm.cpp b/modules/core/src/arithm.cpp index 17bd34cba..ecc2ca064 100644 --- a/modules/core/src/arithm.cpp +++ b/modules/core/src/arithm.cpp @@ -458,9 +458,12 @@ static void add8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, Size sz, void* ) { - IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step); - ippiAdd_8u_C1RSfs(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz, 0), - (vBinOp, IF_SIMD(VAdd)>(src1, step1, src2, step2, dst, step, sz))); +#if (ARITHM_USE_IPP == 1) + fixSteps(sz, sizeof(dst[0]), step1, step2, step); + if (0 <= ippiAdd_8u_C1RSfs(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz, 0)) + return; +#endif + (vBinOp, IF_SIMD(VAdd)>(src1, step1, src2, step2, dst, step, sz)); } static void add8s( const schar* src1, size_t step1, @@ -474,18 +477,24 @@ static void add16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, Size sz, void* ) { - IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step); - ippiAdd_16u_C1RSfs(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz, 0), - (vBinOp, IF_SIMD(VAdd)>(src1, step1, src2, step2, dst, step, sz))); +#if (ARITHM_USE_IPP == 1) + fixSteps(sz, sizeof(dst[0]), step1, step2, step); + if (0 <= ippiAdd_16u_C1RSfs(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz, 0)) + return; +#endif + (vBinOp, IF_SIMD(VAdd)>(src1, step1, src2, step2, dst, step, sz)); } static void add16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, Size sz, void* ) { - IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step); - ippiAdd_16s_C1RSfs(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz, 0), - (vBinOp, IF_SIMD(VAdd)>(src1, step1, src2, step2, dst, step, sz))); +#if (ARITHM_USE_IPP == 1) + fixSteps(sz, sizeof(dst[0]), step1, step2, step); + if (0 <= ippiAdd_16s_C1RSfs(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz, 0)) + return; +#endif + (vBinOp, IF_SIMD(VAdd)>(src1, step1, src2, step2, dst, step, sz)); } static void add32s( const int* src1, size_t step1, @@ -499,9 +508,12 @@ static void add32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, Size sz, void* ) { - IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step); - ippiAdd_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz), - (vBinOp32, IF_SIMD(VAdd)>(src1, step1, src2, step2, dst, step, sz))); +#if (ARITHM_USE_IPP == 1) + fixSteps(sz, sizeof(dst[0]), step1, step2, step); + if (0 <= ippiAdd_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz)) + return; +#endif + (vBinOp32, IF_SIMD(VAdd)>(src1, step1, src2, step2, dst, step, sz)); } static void add64f( const double* src1, size_t step1, @@ -515,9 +527,12 @@ static void sub8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, Size sz, void* ) { - IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step); - ippiSub_8u_C1RSfs(src2, (int)step2, src1, (int)step1, dst, (int)step, (IppiSize&)sz, 0), - (vBinOp, IF_SIMD(VSub)>(src1, step1, src2, step2, dst, step, sz))); +#if (ARITHM_USE_IPP == 1) + fixSteps(sz, sizeof(dst[0]), step1, step2, step); + if (0 <= ippiSub_8u_C1RSfs(src2, (int)step2, src1, (int)step1, dst, (int)step, (IppiSize&)sz, 0)) + return; +#endif + (vBinOp, IF_SIMD(VSub)>(src1, step1, src2, step2, dst, step, sz)); } static void sub8s( const schar* src1, size_t step1, @@ -531,18 +546,24 @@ static void sub16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, Size sz, void* ) { - IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step); - ippiSub_16u_C1RSfs(src2, (int)step2, src1, (int)step1, dst, (int)step, (IppiSize&)sz, 0), - (vBinOp, IF_SIMD(VSub)>(src1, step1, src2, step2, dst, step, sz))); +#if (ARITHM_USE_IPP == 1) + fixSteps(sz, sizeof(dst[0]), step1, step2, step); + if (0 <= ippiSub_16u_C1RSfs(src2, (int)step2, src1, (int)step1, dst, (int)step, (IppiSize&)sz, 0)) + return; +#endif + (vBinOp, IF_SIMD(VSub)>(src1, step1, src2, step2, dst, step, sz)); } static void sub16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, Size sz, void* ) { - IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step); - ippiSub_16s_C1RSfs(src2, (int)step2, src1, (int)step1, dst, (int)step, (IppiSize&)sz, 0), - (vBinOp, IF_SIMD(VSub)>(src1, step1, src2, step2, dst, step, sz))); +#if (ARITHM_USE_IPP == 1) + fixSteps(sz, sizeof(dst[0]), step1, step2, step); + if (0 <= ippiSub_16s_C1RSfs(src2, (int)step2, src1, (int)step1, dst, (int)step, (IppiSize&)sz, 0)) + return; +#endif + (vBinOp, IF_SIMD(VSub)>(src1, step1, src2, step2, dst, step, sz)); } static void sub32s( const int* src1, size_t step1, @@ -556,9 +577,12 @@ static void sub32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, Size sz, void* ) { - IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step); - ippiSub_32f_C1R(src2, (int)step2, src1, (int)step1, dst, (int)step, (IppiSize&)sz), - (vBinOp32, IF_SIMD(VSub)>(src1, step1, src2, step2, dst, step, sz))); +#if (ARITHM_USE_IPP == 1) + fixSteps(sz, sizeof(dst[0]), step1, step2, step); + if (0 <= ippiSub_32f_C1R(src2, (int)step2, src1, (int)step1, dst, (int)step, (IppiSize&)sz)) + return; +#endif + (vBinOp32, IF_SIMD(VSub)>(src1, step1, src2, step2, dst, step, sz)); } static void sub64f( const double* src1, size_t step1, @@ -576,26 +600,23 @@ static void max8u( const uchar* src1, size_t step1, uchar* dst, size_t step, Size sz, void* ) { #if (ARITHM_USE_IPP == 1) - { uchar* s1 = (uchar*)src1; uchar* s2 = (uchar*)src2; uchar* d = dst; fixSteps(sz, sizeof(dst[0]), step1, step2, step); - for(int i = 0; i < sz.height; i++) + int i = 0; + for(; i < sz.height; i++) { - ippsMaxEvery_8u(s1, s2, d, sz.width); - s1 += step1; - s2 += step2; - d += step; + if (0 > ippsMaxEvery_8u(s1, s2, d, sz.width)) + break; + s1 += step1; + s2 += step2; + d += step; } - } -#else - vBinOp, IF_SIMD(VMax)>(src1, step1, src2, step2, dst, step, sz); + if (i == sz.height) + return; #endif - -// IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step); -// ippiMaxEvery_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (IppiSize&)sz), -// (vBinOp8, IF_SIMD(_VMax8u)>(src1, step1, src2, step2, dst, step, sz))); + vBinOp, IF_SIMD(VMax)>(src1, step1, src2, step2, dst, step, sz); } static void max8s( const schar* src1, size_t step1, @@ -610,26 +631,23 @@ static void max16u( const ushort* src1, size_t step1, ushort* dst, size_t step, Size sz, void* ) { #if (ARITHM_USE_IPP == 1) - { ushort* s1 = (ushort*)src1; ushort* s2 = (ushort*)src2; ushort* d = dst; fixSteps(sz, sizeof(dst[0]), step1, step2, step); - for(int i = 0; i < sz.height; i++) + int i = 0; + for(; i < sz.height; i++) { - ippsMaxEvery_16u(s1, s2, d, sz.width); - s1 = (ushort*)((uchar*)s1 + step1); - s2 = (ushort*)((uchar*)s2 + step2); - d = (ushort*)((uchar*)d + step); + if (0 > ippsMaxEvery_16u(s1, s2, d, sz.width)) + break; + s1 = (ushort*)((uchar*)s1 + step1); + s2 = (ushort*)((uchar*)s2 + step2); + d = (ushort*)((uchar*)d + step); } - } -#else - vBinOp, IF_SIMD(VMax)>(src1, step1, src2, step2, dst, step, sz); + if (i == sz.height) + return; #endif - -// IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step); -// ippiMaxEvery_16u_C1R(src1, (int)step1, src2, (int)step2, dst, (IppiSize&)sz), -// (vBinOp16, IF_SIMD(_VMax16u)>(src1, step1, src2, step2, dst, step, sz))); + vBinOp, IF_SIMD(VMax)>(src1, step1, src2, step2, dst, step, sz); } static void max16s( const short* src1, size_t step1, @@ -651,25 +669,23 @@ static void max32f( const float* src1, size_t step1, float* dst, size_t step, Size sz, void* ) { #if (ARITHM_USE_IPP == 1) - { float* s1 = (float*)src1; float* s2 = (float*)src2; float* d = dst; fixSteps(sz, sizeof(dst[0]), step1, step2, step); - for(int i = 0; i < sz.height; i++) + int i = 0; + for(; i < sz.height; i++) { - ippsMaxEvery_32f(s1, s2, d, sz.width); - s1 = (float*)((uchar*)s1 + step1); - s2 = (float*)((uchar*)s2 + step2); - d = (float*)((uchar*)d + step); + if (0 > ippsMaxEvery_32f(s1, s2, d, sz.width)) + break; + s1 = (float*)((uchar*)s1 + step1); + s2 = (float*)((uchar*)s2 + step2); + d = (float*)((uchar*)d + step); } - } -#else - vBinOp32, IF_SIMD(VMax)>(src1, step1, src2, step2, dst, step, sz); + if (i == sz.height) + return; #endif -// IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step); -// ippiMaxEvery_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (IppiSize&)sz), -// (vBinOp32f, IF_SIMD(_VMax32f)>(src1, step1, src2, step2, dst, step, sz))); + vBinOp32, IF_SIMD(VMax)>(src1, step1, src2, step2, dst, step, sz); } static void max64f( const double* src1, size_t step1, @@ -684,26 +700,23 @@ static void min8u( const uchar* src1, size_t step1, uchar* dst, size_t step, Size sz, void* ) { #if (ARITHM_USE_IPP == 1) - { uchar* s1 = (uchar*)src1; uchar* s2 = (uchar*)src2; uchar* d = dst; fixSteps(sz, sizeof(dst[0]), step1, step2, step); - for(int i = 0; i < sz.height; i++) + int i = 0; + for(; i < sz.height; i++) { - ippsMinEvery_8u(s1, s2, d, sz.width); - s1 += step1; - s2 += step2; - d += step; + if (0 > ippsMinEvery_8u(s1, s2, d, sz.width)) + break; + s1 += step1; + s2 += step2; + d += step; } - } -#else - vBinOp, IF_SIMD(VMin)>(src1, step1, src2, step2, dst, step, sz); + if (i == sz.height) + return; #endif - -// IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step); -// ippiMinEvery_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (IppiSize&)sz), -// (vBinOp8, IF_SIMD(_VMin8u)>(src1, step1, src2, step2, dst, step, sz))); + vBinOp, IF_SIMD(VMin)>(src1, step1, src2, step2, dst, step, sz); } static void min8s( const schar* src1, size_t step1, @@ -718,26 +731,23 @@ static void min16u( const ushort* src1, size_t step1, ushort* dst, size_t step, Size sz, void* ) { #if (ARITHM_USE_IPP == 1) - { ushort* s1 = (ushort*)src1; ushort* s2 = (ushort*)src2; ushort* d = dst; fixSteps(sz, sizeof(dst[0]), step1, step2, step); - for(int i = 0; i < sz.height; i++) + int i = 0; + for(; i < sz.height; i++) { - ippsMinEvery_16u(s1, s2, d, sz.width); - s1 = (ushort*)((uchar*)s1 + step1); - s2 = (ushort*)((uchar*)s2 + step2); - d = (ushort*)((uchar*)d + step); + if (0 > ippsMinEvery_16u(s1, s2, d, sz.width)) + break; + s1 = (ushort*)((uchar*)s1 + step1); + s2 = (ushort*)((uchar*)s2 + step2); + d = (ushort*)((uchar*)d + step); } - } -#else - vBinOp, IF_SIMD(VMin)>(src1, step1, src2, step2, dst, step, sz); + if (i == sz.height) + return; #endif - -// IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step); -// ippiMinEvery_16u_C1R(src1, (int)step1, src2, (int)step2, dst, (IppiSize&)sz), -// (vBinOp16, IF_SIMD(_VMin16u)>(src1, step1, src2, step2, dst, step, sz))); + vBinOp, IF_SIMD(VMin)>(src1, step1, src2, step2, dst, step, sz); } static void min16s( const short* src1, size_t step1, @@ -759,25 +769,23 @@ static void min32f( const float* src1, size_t step1, float* dst, size_t step, Size sz, void* ) { #if (ARITHM_USE_IPP == 1) - { float* s1 = (float*)src1; float* s2 = (float*)src2; float* d = dst; fixSteps(sz, sizeof(dst[0]), step1, step2, step); - for(int i = 0; i < sz.height; i++) + int i = 0; + for(; i < sz.height; i++) { - ippsMinEvery_32f(s1, s2, d, sz.width); - s1 = (float*)((uchar*)s1 + step1); - s2 = (float*)((uchar*)s2 + step2); - d = (float*)((uchar*)d + step); + if (0 > ippsMinEvery_32f(s1, s2, d, sz.width)) + break; + s1 = (float*)((uchar*)s1 + step1); + s2 = (float*)((uchar*)s2 + step2); + d = (float*)((uchar*)d + step); } - } -#else - vBinOp32, IF_SIMD(VMin)>(src1, step1, src2, step2, dst, step, sz); + if (i == sz.height) + return; #endif -// IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step); -// ippiMinEvery_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (IppiSize&)sz), -// (vBinOp32f, IF_SIMD(_VMin32f)>(src1, step1, src2, step2, dst, step, sz))); + vBinOp32, IF_SIMD(VMin)>(src1, step1, src2, step2, dst, step, sz); } static void min64f( const double* src1, size_t step1, @@ -791,9 +799,12 @@ static void absdiff8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, Size sz, void* ) { - IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step); - ippiAbsDiff_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz), - (vBinOp, IF_SIMD(VAbsDiff)>(src1, step1, src2, step2, dst, step, sz))); +#if (ARITHM_USE_IPP == 1) + fixSteps(sz, sizeof(dst[0]), step1, step2, step); + if (0 <= ippiAbsDiff_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz)) + return; +#endif + (vBinOp, IF_SIMD(VAbsDiff)>(src1, step1, src2, step2, dst, step, sz)); } static void absdiff8s( const schar* src1, size_t step1, @@ -807,9 +818,12 @@ static void absdiff16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, Size sz, void* ) { - IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step); - ippiAbsDiff_16u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz), - (vBinOp, IF_SIMD(VAbsDiff)>(src1, step1, src2, step2, dst, step, sz))); +#if (ARITHM_USE_IPP == 1) + fixSteps(sz, sizeof(dst[0]), step1, step2, step); + if (0 <= ippiAbsDiff_16u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz)) + return; +#endif + (vBinOp, IF_SIMD(VAbsDiff)>(src1, step1, src2, step2, dst, step, sz)); } static void absdiff16s( const short* src1, size_t step1, @@ -830,9 +844,12 @@ static void absdiff32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, Size sz, void* ) { - IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step); - ippiAbsDiff_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz), - (vBinOp32, IF_SIMD(VAbsDiff)>(src1, step1, src2, step2, dst, step, sz))); +#if (ARITHM_USE_IPP == 1) + fixSteps(sz, sizeof(dst[0]), step1, step2, step); + if (0 <= ippiAbsDiff_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz)) + return; +#endif + (vBinOp32, IF_SIMD(VAbsDiff)>(src1, step1, src2, step2, dst, step, sz)); } static void absdiff64f( const double* src1, size_t step1, @@ -847,36 +864,48 @@ static void and8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, Size sz, void* ) { - IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step); - ippiAnd_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz), - (vBinOp, IF_SIMD(VAnd)>(src1, step1, src2, step2, dst, step, sz))); +#if (ARITHM_USE_IPP == 1) + fixSteps(sz, sizeof(dst[0]), step1, step2, step); + if (0 <= ippiAnd_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz)) + return; +#endif + (vBinOp, IF_SIMD(VAnd)>(src1, step1, src2, step2, dst, step, sz)); } static void or8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, Size sz, void* ) { - IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step); - ippiOr_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz), - (vBinOp, IF_SIMD(VOr)>(src1, step1, src2, step2, dst, step, sz))); +#if (ARITHM_USE_IPP == 1) + fixSteps(sz, sizeof(dst[0]), step1, step2, step); + if (0 <= ippiOr_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz)) + return; +#endif + (vBinOp, IF_SIMD(VOr)>(src1, step1, src2, step2, dst, step, sz)); } static void xor8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, Size sz, void* ) { - IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step); - ippiXor_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz), - (vBinOp, IF_SIMD(VXor)>(src1, step1, src2, step2, dst, step, sz))); +#if (ARITHM_USE_IPP == 1) + fixSteps(sz, sizeof(dst[0]), step1, step2, step); + if (0 <= ippiXor_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz)) + return; +#endif + (vBinOp, IF_SIMD(VXor)>(src1, step1, src2, step2, dst, step, sz)); } static void not8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, Size sz, void* ) { - IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step); (void *)src2; - ippiNot_8u_C1R(src1, (int)step1, dst, (int)step, (IppiSize&)sz), - (vBinOp, IF_SIMD(VNot)>(src1, step1, src2, step2, dst, step, sz))); +#if (ARITHM_USE_IPP == 1) + fixSteps(sz, sizeof(dst[0]), step1, step2, step); (void *)src2; + if (0 <= ippiNot_8u_C1R(src1, (int)step1, dst, (int)step, (IppiSize&)sz)) + return; +#endif + (vBinOp, IF_SIMD(VNot)>(src1, step1, src2, step2, dst, step, sz)); } /****************************************************************************************\ @@ -2357,7 +2386,7 @@ static void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t ste if( op >= 0 ) { fixSteps(size, sizeof(dst[0]), step1, step2, step); - if( ippiCompare_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)size, op) >= 0 ) + if (0 <= ippiCompare_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)size, op)) return; } #endif @@ -2440,7 +2469,7 @@ static void cmp16u(const ushort* src1, size_t step1, const ushort* src2, size_t if( op >= 0 ) { fixSteps(size, sizeof(dst[0]), step1, step2, step); - if( ippiCompare_16u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)size, op) >= 0 ) + if (0 <= ippiCompare_16u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)size, op)) return; } #endif @@ -2455,7 +2484,7 @@ static void cmp16s(const short* src1, size_t step1, const short* src2, size_t st if( op > 0 ) { fixSteps(size, sizeof(dst[0]), step1, step2, step); - if( ippiCompare_16s_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)size, op) >= 0 ) + if (0 <= ippiCompare_16s_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)size, op)) return; } #endif @@ -2561,7 +2590,7 @@ static void cmp32f(const float* src1, size_t step1, const float* src2, size_t st if( op >= 0 ) { fixSteps(size, sizeof(dst[0]), step1, step2, step); - if( ippiCompare_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)size, op) >= 0 ) + if (0 <= ippiCompare_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)size, op)) return; } #endif diff --git a/modules/core/src/mathfuncs.cpp b/modules/core/src/mathfuncs.cpp index 16df02caf..12ba4fa5b 100644 --- a/modules/core/src/mathfuncs.cpp +++ b/modules/core/src/mathfuncs.cpp @@ -812,8 +812,6 @@ typedef union } DBLINT; -#ifndef HAVE_IPP - #define EXPTAB_SCALE 6 #define EXPTAB_MASK ((1 << EXPTAB_SCALE) - 1) @@ -1275,13 +1273,26 @@ static void Exp_64f( const double *_x, double *y, int n ) #undef EXPTAB_MASK #undef EXPPOLY_32F_A0 -#else +#ifdef HAVE_IPP +static void Exp_32f_ipp(const float *x, float *y, int n) +{ + if (0 <= ippsExp_32f_A21(x, y, n)) + return; + Exp_32f(x, y, n); +} -#define Exp_32f ippsExp_32f_A21 -#define Exp_64f ippsExp_64f_A50 +static void Exp_64f_ipp(const double *x, double *y, int n) +{ + if (0 <= ippsExp_64f_A50(x, y, n)) + return; + Exp_64f(x, y, n); +} +#define Exp_32f Exp_32f_ipp +#define Exp_64f Exp_64f_ipp #endif + void exp( InputArray _src, OutputArray _dst ) { int type = _src.type(), depth = _src.depth(), cn = _src.channels(); @@ -1302,9 +1313,9 @@ void exp( InputArray _src, OutputArray _dst ) for( size_t i = 0; i < it.nplanes; i++, ++it ) { if( depth == CV_32F ) - Exp_32f( (const float*)ptrs[0], (float*)ptrs[1], len ); + Exp_32f((const float*)ptrs[0], (float*)ptrs[1], len); else - Exp_64f( (const double*)ptrs[0], (double*)ptrs[1], len ); + Exp_64f((const double*)ptrs[0], (double*)ptrs[1], len); } } @@ -1313,8 +1324,6 @@ void exp( InputArray _src, OutputArray _dst ) * L O G * \****************************************************************************************/ -#ifndef HAVE_IPP - #define LOGTAB_SCALE 8 #define LOGTAB_MASK ((1 << LOGTAB_SCALE) - 1) #define LOGTAB_MASK2 ((1 << (20 - LOGTAB_SCALE)) - 1) @@ -1922,11 +1931,23 @@ static void Log_64f( const double *x, double *y, int n ) } } -#else +#ifdef HAVE_IPP +static void Log_32f_ipp(const float *x, float *y, int n) +{ + if (0 <= ippsLn_32f_A21(x, y, n)) + return; + Log_32f(x, y, n); +} -#define Log_32f ippsLn_32f_A21 -#define Log_64f ippsLn_64f_A50 +static void Log_64f_ipp(const double *x, double *y, int n) +{ + if (0 <= ippsLn_64f_A50(x, y, n)) + return; + Log_64f(x, y, n); +} +#define Log_32f Log_32f_ipp +#define Log_64f Log_64f_ipp #endif void log( InputArray _src, OutputArray _dst ) diff --git a/modules/core/src/matmul.cpp b/modules/core/src/matmul.cpp index daad86a0f..8891bb05f 100644 --- a/modules/core/src/matmul.cpp +++ b/modules/core/src/matmul.cpp @@ -44,10 +44,6 @@ #include "opencl_kernels.hpp" #include "opencv2/core/opencl/runtime/opencl_clamdblas.hpp" -#ifdef HAVE_IPP -#include "ippversion.h" -#endif - namespace cv { @@ -2803,11 +2799,11 @@ static double dotProd_8u(const uchar* src1, const uchar* src2, int len) { double r = 0; #if ARITHM_USE_IPP - ippiDotProd_8u64f_C1R(src1, (int)(len*sizeof(src1[0])), - src2, (int)(len*sizeof(src2[0])), - ippiSize(len, 1), &r); - return r; -#else + if (0 <= ippiDotProd_8u64f_C1R(src1, (int)(len*sizeof(src1[0])), + src2, (int)(len*sizeof(src2[0])), + ippiSize(len, 1), &r)) + return r; +#endif int i = 0; #if CV_SSE2 @@ -2853,7 +2849,6 @@ static double dotProd_8u(const uchar* src1, const uchar* src2, int len) } #endif return r + dotProd_(src1, src2, len - i); -#endif } @@ -2864,48 +2859,52 @@ static double dotProd_8s(const schar* src1, const schar* src2, int len) static double dotProd_16u(const ushort* src1, const ushort* src2, int len) { +#if (ARITHM_USE_IPP == 1) double r = 0; - IF_IPP(ippiDotProd_16u64f_C1R(src1, (int)(len*sizeof(src1[0])), - src2, (int)(len*sizeof(src2[0])), - ippiSize(len, 1), &r), - r = dotProd_(src1, src2, len)); - return r; + if (0 <= ippiDotProd_16u64f_C1R(src1, (int)(len*sizeof(src1[0])), src2, (int)(len*sizeof(src2[0])), ippiSize(len, 1), &r)) + return r; +#endif + return dotProd_(src1, src2, len); } static double dotProd_16s(const short* src1, const short* src2, int len) { +#if (ARITHM_USE_IPP == 1) double r = 0; - IF_IPP(ippiDotProd_16s64f_C1R(src1, (int)(len*sizeof(src1[0])), - src2, (int)(len*sizeof(src2[0])), - ippiSize(len, 1), &r), - r = dotProd_(src1, src2, len)); - return r; + if (0 <= ippiDotProd_16s64f_C1R(src1, (int)(len*sizeof(src1[0])), src2, (int)(len*sizeof(src2[0])), ippiSize(len, 1), &r)) + return r; +#endif + return dotProd_(src1, src2, len); } static double dotProd_32s(const int* src1, const int* src2, int len) { +#if (ARITHM_USE_IPP == 1) double r = 0; - IF_IPP(ippiDotProd_32s64f_C1R(src1, (int)(len*sizeof(src1[0])), - src2, (int)(len*sizeof(src2[0])), - ippiSize(len, 1), &r), - r = dotProd_(src1, src2, len)); - return r; + if (0 <= ippiDotProd_32s64f_C1R(src1, (int)(len*sizeof(src1[0])), src2, (int)(len*sizeof(src2[0])), ippiSize(len, 1), &r)) + return r; +#endif + return dotProd_(src1, src2, len); } static double dotProd_32f(const float* src1, const float* src2, int len) { +#if (ARITHM_USE_IPP == 1) double r = 0; - IF_IPP(ippsDotProd_32f64f(src1, src2, len, &r), - r = dotProd_(src1, src2, len)); - return r; + if (0 <= ippsDotProd_32f64f(src1, src2, len, &r)) + return r; +#endif + return dotProd_(src1, src2, len); } static double dotProd_64f(const double* src1, const double* src2, int len) { +#if (ARITHM_USE_IPP == 1) double r = 0; - IF_IPP(ippsDotProd_64f(src1, src2, len, &r), - r = dotProd_(src1, src2, len)); - return r; + if (0 <= ippsDotProd_64f(src1, src2, len, &r)) + return r; +#endif + return dotProd_(src1, src2, len); } diff --git a/modules/core/src/precomp.hpp b/modules/core/src/precomp.hpp index ff5943bc6..1c498cd01 100644 --- a/modules/core/src/precomp.hpp +++ b/modules/core/src/precomp.hpp @@ -199,10 +199,8 @@ enum { BLOCK_SIZE = 1024 }; #if defined HAVE_IPP && (IPP_VERSION_MAJOR >= 7) #define ARITHM_USE_IPP 1 -#define IF_IPP(then_call, else_call) then_call #else #define ARITHM_USE_IPP 0 -#define IF_IPP(then_call, else_call) else_call #endif inline bool checkScalar(const Mat& sc, int atype, int sckind, int akind) diff --git a/modules/core/src/stat.cpp b/modules/core/src/stat.cpp index a4605d160..b7e5d3422 100644 --- a/modules/core/src/stat.cpp +++ b/modules/core/src/stat.cpp @@ -43,7 +43,6 @@ #include "precomp.hpp" #include "opencl_kernels.hpp" #include -#include namespace cv { @@ -972,7 +971,9 @@ void cv::meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, Input ippiMeanStdDevFuncC1 ippFuncC1 = type == CV_8UC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_8u_C1R : type == CV_16UC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_16u_C1R : - //type == CV_32FC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_32f_C1R ://Aug 2013: bug in IPP 7.1, 8.0 +#if (IPP_VERSION_MAJOR >= 8) && (IPP_VERSION_MINOR >= 1) + type == CV_32FC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_32f_C1R ://Aug 2013: bug in IPP 7.1, 8.0 +#endif 0; if( ippFuncC1 ) { @@ -2111,8 +2112,10 @@ double cv::norm( InputArray _src, int normType, InputArray _mask ) type == CV_16UC3 ? (ippiNormFuncNoHint)ippiNorm_Inf_16u_C3R : type == CV_16UC4 ? (ippiNormFuncNoHint)ippiNorm_Inf_16u_C4R : type == CV_16SC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_16s_C1R : - //type == CV_16SC3 ? (ippiNormFunc)ippiNorm_Inf_16s_C3R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768 - //type == CV_16SC4 ? (ippiNormFunc)ippiNorm_Inf_16s_C4R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768 +#if (IPP_VERSION_MAJOR >= 8) && (IPP_VERSION_MINOR >= 1) + type == CV_16SC3 ? (ippiNormFuncNoHint)ippiNorm_Inf_16s_C3R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768 + type == CV_16SC4 ? (ippiNormFuncNoHint)ippiNorm_Inf_16s_C4R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768 +#endif type == CV_32FC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_32f_C1R : type == CV_32FC3 ? (ippiNormFuncNoHint)ippiNorm_Inf_32f_C3R : type == CV_32FC4 ? (ippiNormFuncNoHint)ippiNorm_Inf_32f_C4R : @@ -2541,8 +2544,10 @@ double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _m type == CV_16UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16u_C3R : type == CV_16UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16u_C4R : type == CV_16SC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16s_C1R : - //type == CV_16SC3 ? (ippiNormDiffFunc)ippiNormDiff_Inf_16s_C3R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768 - //type == CV_16SC4 ? (ippiNormDiffFunc)ippiNormDiff_Inf_16s_C4R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768 +#if (IPP_VERSION_MAJOR >= 8) && (IPP_VERSION_MINOR >= 1) + type == CV_16SC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16s_C3R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768 + type == CV_16SC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16s_C4R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768 +#endif type == CV_32FC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_32f_C1R : type == CV_32FC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_32f_C3R : type == CV_32FC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_32f_C4R : diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp index 5b8a69649..4b3efce4a 100644 --- a/modules/core/src/system.cpp +++ b/modules/core/src/system.cpp @@ -397,17 +397,17 @@ int64 getCPUTickCount(void) #else -#ifdef HAVE_IPP -int64 getCPUTickCount(void) -{ - return ippGetCpuClocks(); -} -#else +//#ifdef HAVE_IPP +//int64 getCPUTickCount(void) +//{ +// return ippGetCpuClocks(); +//} +//#else int64 getCPUTickCount(void) { return getTickCount(); } -#endif +//#endif #endif diff --git a/modules/imgproc/doc/feature_detection.rst b/modules/imgproc/doc/feature_detection.rst index de16c0751..2187b8fd8 100644 --- a/modules/imgproc/doc/feature_detection.rst +++ b/modules/imgproc/doc/feature_detection.rst @@ -1,4 +1,4 @@ -Feature Detection +Feature Detection ================= .. highlight:: cpp @@ -15,9 +15,9 @@ Finds edges in an image using the [Canny86]_ algorithm. .. ocv:cfunction:: void cvCanny( const CvArr* image, CvArr* edges, double threshold1, double threshold2, int aperture_size=3 ) - :param image: single-channel 8-bit input image. + :param image: 8-bit input image. - :param edges: output edge map; it has the same size and type as ``image`` . + :param edges: output edge map; single channels 8-bit image, which has the same size as ``image`` . :param threshold1: first threshold for the hysteresis procedure. diff --git a/modules/imgproc/perf/perf_bilateral.cpp b/modules/imgproc/perf/perf_bilateral.cpp index af565f8a5..4ed0c4db6 100644 --- a/modules/imgproc/perf/perf_bilateral.cpp +++ b/modules/imgproc/perf/perf_bilateral.cpp @@ -34,5 +34,5 @@ PERF_TEST_P( TestBilateralFilter, BilateralFilter, TEST_CYCLE() bilateralFilter(src, dst, d, sigmaColor, sigmaSpace, BORDER_DEFAULT); - SANITY_CHECK(dst); + SANITY_CHECK(dst, .01, ERROR_RELATIVE); } diff --git a/modules/imgproc/src/canny.cpp b/modules/imgproc/src/canny.cpp index fbc92dde2..65e817306 100644 --- a/modules/imgproc/src/canny.cpp +++ b/modules/imgproc/src/canny.cpp @@ -42,13 +42,13 @@ #include "precomp.hpp" #include "opencl_kernels.hpp" -/* + #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) #define USE_IPP_CANNY 1 #else #undef USE_IPP_CANNY #endif -*/ + namespace cv { @@ -81,8 +81,8 @@ static bool ippCanny(const Mat& _src, Mat& _dst, float low, float high) return false; if( ippiCanny_16s8u_C1R(_dx.ptr(), (int)_dx.step, - _dy.ptr(), (int)_dy.step, - _dst.data, (int)_dst.step, roi, low, high, buffer) < 0 ) + _dy.ptr(), (int)_dy.step, + _dst.data, (int)_dst.step, roi, low, high, buffer) < 0 ) return false; return true; } @@ -286,7 +286,7 @@ void cv::Canny( InputArray _src, OutputArray _dst, #endif #ifdef USE_IPP_CANNY - if( aperture_size == 3 && !L2gradient && + if( aperture_size == 3 && !L2gradient && 1 == cn && ippCanny(src, dst, (float)low_thresh, (float)high_thresh) ) return; #endif diff --git a/modules/imgproc/src/color.cpp b/modules/imgproc/src/color.cpp index 3822ab3c1..30b04c238 100644 --- a/modules/imgproc/src/color.cpp +++ b/modules/imgproc/src/color.cpp @@ -252,6 +252,7 @@ bool CvtColorIPPLoopCopy(Mat& src, Mat& dst, const Cvt& cvt) } bool ok; parallel_for_(Range(0, source.rows), CvtColorIPPLoop_Invoker(source, dst, cvt, &ok), source.total()/(double)(1<<16) ); + //ok = cvt(src.ptr(0), (int)src.step[0], dst.ptr(0), (int)dst.step[0], src.cols, src.rows); return ok; } @@ -297,11 +298,13 @@ static ippiReorderFunc ippiSwapChannelsC3RTab[] = 0, (ippiReorderFunc)ippiSwapChannels_32f_C3R, 0, 0 }; +#if (IPP_VERSION_MAJOR >= 8) && (IPP_VERSION_MINOR >= 1) static ippiReorderFunc ippiSwapChannelsC4RTab[] = { - (ippiReorderFunc)ippiSwapChannels_8u_AC4R, 0, (ippiReorderFunc)ippiSwapChannels_16u_AC4R, 0, - 0, (ippiReorderFunc)ippiSwapChannels_32f_AC4R, 0, 0 + (ippiReorderFunc)ippiSwapChannels_8u_C4R, 0, (ippiReorderFunc)ippiSwapChannels_16u_C4R, 0, + 0, (ippiReorderFunc)ippiSwapChannels_32f_C4R, 0, 0 }; +#endif static ippiColor2GrayFunc ippiColor2GrayC3Tab[] = { @@ -3251,11 +3254,13 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) if( CvtColorIPPLoopCopy(src, dst, IPPReorderFunctor(ippiSwapChannelsC3RTab[depth], 2, 1, 0)) ) return; } +#if (IPP_VERSION_MAJOR >= 8) && (IPP_VERSION_MINOR >= 1) else if( code == CV_RGBA2BGRA ) { if( CvtColorIPPLoopCopy(src, dst, IPPReorderFunctor(ippiSwapChannelsC4RTab[depth], 2, 1, 0)) ) return; } +#endif #endif if( depth == CV_8U ) @@ -3310,14 +3315,17 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) CV_Assert( scn == 3 || scn == 4 ); _dst.create(sz, CV_MAKETYPE(depth, 1)); dst = _dst.getMat(); -/* +/**/ #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) +/* if( code == CV_BGR2GRAY ) { if( CvtColorIPPLoop(src, dst, IPPColor2GrayFunctor(ippiColor2GrayC3Tab[depth])) ) return; } - else if( code == CV_RGB2GRAY ) + else +*/ + if( code == CV_RGB2GRAY ) { if( CvtColorIPPLoop(src, dst, IPPGeneralFunctor(ippiRGB2GrayC3Tab[depth])) ) return; @@ -3333,7 +3341,7 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) return; } #endif -*/ +/**/ bidx = code == CV_BGR2GRAY || code == CV_BGRA2GRAY ? 0 : 2; if( depth == CV_8U ) diff --git a/modules/imgproc/src/deriv.cpp b/modules/imgproc/src/deriv.cpp index df2f371f5..0b19f22be 100644 --- a/modules/imgproc/src/deriv.cpp +++ b/modules/imgproc/src/deriv.cpp @@ -190,223 +190,231 @@ namespace cv static bool IPPDerivScharr(const Mat& src, Mat& dst, int ddepth, int dx, int dy, double scale) { - int bufSize = 0; - cv::AutoBuffer buffer; - IppiSize roi = ippiSize(src.cols, src.rows); + int bufSize = 0; + cv::AutoBuffer buffer; + IppiSize roi = ippiSize(src.cols, src.rows); - if( ddepth < 0 ) - ddepth = src.depth(); + if( ddepth < 0 ) + ddepth = src.depth(); - dst.create( src.size(), CV_MAKETYPE(ddepth, src.channels()) ); + dst.create( src.size(), CV_MAKETYPE(ddepth, src.channels()) ); - switch(src.type()) - { - case CV_8U: - { + switch(src.type()) + { + case CV_8U: + { if(scale != 1) return false; switch(dst.type()) { - case CV_16S: - { - if((dx == 1) && (dy == 0)) - { - ippiFilterScharrVertGetBufferSize_8u16s_C1R(roi,&bufSize); - buffer.allocate(bufSize); - - ippiFilterScharrVertBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step, - (Ipp16s*)dst.data, (int)dst.step, roi, ippBorderRepl, 0, (Ipp8u*)(char*)buffer); - - return true; - } - - if((dx == 0) && (dy == 1)) - { - ippiFilterScharrHorizGetBufferSize_8u16s_C1R(roi,&bufSize); - buffer.allocate(bufSize); - - ippiFilterScharrHorizBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step, - (Ipp16s*)dst.data, (int)dst.step, roi, ippBorderRepl, 0, (Ipp8u*)(char*)buffer); - - return true; - } - } - - default: - return false; + case CV_16S: + { + if ((dx == 1) && (dy == 0)) + { + if (0 > ippiFilterScharrVertGetBufferSize_8u16s_C1R(roi,&bufSize)) + return false; + buffer.allocate(bufSize); + return (0 <= ippiFilterScharrVertBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step, + (Ipp16s*)dst.data, (int)dst.step, roi, ippBorderRepl, 0, (Ipp8u*)(char*)buffer)); + } + if ((dx == 0) && (dy == 1)) + { + if (0 > ippiFilterScharrHorizGetBufferSize_8u16s_C1R(roi,&bufSize)) + return false; + buffer.allocate(bufSize); + return (0 <= ippiFilterScharrHorizBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step, + (Ipp16s*)dst.data, (int)dst.step, roi, ippBorderRepl, 0, (Ipp8u*)(char*)buffer)); + } + return false; + } + default: + return false; } - } - - case CV_32F: - { + } + case CV_32F: + { switch(dst.type()) { - case CV_32F: - if((dx == 1) && (dy == 0)) - { - ippiFilterScharrVertGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows),&bufSize); - buffer.allocate(bufSize); + case CV_32F: + { + if ((dx == 1) && (dy == 0)) + { + if (0 > ippiFilterScharrVertGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows),&bufSize)) + return false; + buffer.allocate(bufSize); - ippiFilterScharrVertBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step, - (Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), - ippBorderRepl, 0, (Ipp8u*)(char*)buffer); - if(scale != 1) - /* IPP is fast, so MulC produce very little perf degradation */ - ippiMulC_32f_C1IR((Ipp32f)scale, (Ipp32f*)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows)); + if (0 > ippiFilterScharrVertBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step, + (Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), + ippBorderRepl, 0, (Ipp8u*)(char*)buffer)) + { + return false; + } - return true; - } + if (scale != 1) + /* IPP is fast, so MulC produce very little perf degradation.*/ + //ippiMulC_32f_C1IR((Ipp32f)scale, (Ipp32f*)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows)); + ippiMulC_32f_C1R((Ipp32f*)dst.data, (int)dst.step, (Ipp32f)scale, (Ipp32f*)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows)); + return true; + } + if ((dx == 0) && (dy == 1)) + { + if (0 > ippiFilterScharrHorizGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows),&bufSize)) + return false; + buffer.allocate(bufSize); - if((dx == 0) && (dy == 1)) - { - ippiFilterScharrHorizGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows),&bufSize); - buffer.allocate(bufSize); + if (0 > ippiFilterScharrHorizBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step, + (Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), + ippBorderRepl, 0, (Ipp8u*)(char*)buffer)) + return false; - ippiFilterScharrHorizBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step, - (Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), - ippBorderRepl, 0, (Ipp8u*)(char*)buffer); - if(scale != 1) - ippiMulC_32f_C1IR((Ipp32f)scale, (Ipp32f *)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows)); - - return true; - } - - default: - return false; + if (scale != 1) + ippiMulC_32f_C1R((Ipp32f *)dst.data, (int)dst.step, (Ipp32f)scale, (Ipp32f *)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows)); + return true; + } + } + default: + return false; } - } - - default: - return false; - } + } + default: + return false; + } } static bool IPPDeriv(const Mat& src, Mat& dst, int ddepth, int dx, int dy, int ksize, double scale) { - int bufSize = 0; - cv::AutoBuffer buffer; + int bufSize = 0; + cv::AutoBuffer buffer; + if (ksize == 3 || ksize == 5) + { + if ( ddepth < 0 ) + ddepth = src.depth(); - if(ksize == 3 || ksize == 5) - { - if( ddepth < 0 ) - ddepth = src.depth(); + if (src.type() == CV_8U && dst.type() == CV_16S && scale == 1) + { + if ((dx == 1) && (dy == 0)) + { + if (0 > ippiFilterSobelNegVertGetBufferSize_8u16s_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize)) + return false; + buffer.allocate(bufSize); - if(src.type() == CV_8U && dst.type() == CV_16S && scale == 1) - { - if((dx == 1) && (dy == 0)) - { - ippiFilterSobelNegVertGetBufferSize_8u16s_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize); - buffer.allocate(bufSize); + return (0 <= ippiFilterSobelNegVertBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step, + (Ipp16s*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), + ippBorderRepl, 0, (Ipp8u*)(char*)buffer)); + } - ippiFilterSobelNegVertBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step, - (Ipp16s*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), - ippBorderRepl, 0, (Ipp8u*)(char*)buffer); - return true; - } + if ((dx == 0) && (dy == 1)) + { + if (0 > ippiFilterSobelHorizGetBufferSize_8u16s_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize)) + return false; + buffer.allocate(bufSize); - if((dx == 0) && (dy == 1)) - { - ippiFilterSobelHorizGetBufferSize_8u16s_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize); - buffer.allocate(bufSize); + return (0 <= ippiFilterSobelHorizBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step, + (Ipp16s*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), + ippBorderRepl, 0, (Ipp8u*)(char*)buffer)); + } - ippiFilterSobelHorizBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step, - (Ipp16s*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), - ippBorderRepl, 0, (Ipp8u*)(char*)buffer); + if ((dx == 2) && (dy == 0)) + { + if (0 > ippiFilterSobelVertSecondGetBufferSize_8u16s_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize)) + return false; + buffer.allocate(bufSize); - return true; - } + return (0 <= ippiFilterSobelVertSecondBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step, + (Ipp16s*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), + ippBorderRepl, 0, (Ipp8u*)(char*)buffer)); + } - if((dx == 2) && (dy == 0)) - { - ippiFilterSobelVertSecondGetBufferSize_8u16s_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize); - buffer.allocate(bufSize); + if ((dx == 0) && (dy == 2)) + { + if (0 > ippiFilterSobelHorizSecondGetBufferSize_8u16s_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize)) + return false; + buffer.allocate(bufSize); - ippiFilterSobelVertSecondBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step, - (Ipp16s*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), - ippBorderRepl, 0, (Ipp8u*)(char*)buffer); + return (0 <= ippiFilterSobelHorizSecondBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step, + (Ipp16s*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), + ippBorderRepl, 0, (Ipp8u*)(char*)buffer)); + } + } - return true; - } + if (src.type() == CV_32F && dst.type() == CV_32F) + { + if ((dx == 1) && (dy == 0)) + { + if (0 > ippiFilterSobelNegVertGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), &bufSize)) + return false; + buffer.allocate(bufSize); - if((dx == 0) && (dy == 2)) - { - ippiFilterSobelHorizSecondGetBufferSize_8u16s_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize); - buffer.allocate(bufSize); + if (0 > ippiFilterSobelNegVertBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step, + (Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), + ippBorderRepl, 0, (Ipp8u*)(char*)buffer)) + { + return false; + } + if(scale != 1) + ippiMulC_32f_C1R((Ipp32f *)dst.data, (int)dst.step, (Ipp32f)scale, (Ipp32f *)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows)); + return true; + } - ippiFilterSobelHorizSecondBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step, - (Ipp16s*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), - ippBorderRepl, 0, (Ipp8u*)(char*)buffer); + if ((dx == 0) && (dy == 1)) + { + if (0 > ippiFilterSobelHorizGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize)) + return false; + buffer.allocate(bufSize); - return true; - } - } + if (0 > ippiFilterSobelHorizBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step, + (Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), + ippBorderRepl, 0, (Ipp8u*)(char*)buffer)) + { + return false; + } + if(scale != 1) + ippiMulC_32f_C1R((Ipp32f *)dst.data, (int)dst.step, (Ipp32f)scale, (Ipp32f *)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows)); + return true; + } - if(src.type() == CV_32F && dst.type() == CV_32F) - { - if((dx == 1) && (dy == 0)) - { - ippiFilterSobelNegVertGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), &bufSize); - buffer.allocate(bufSize); + if((dx == 2) && (dy == 0)) + { + if (0 > ippiFilterSobelVertSecondGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize)) + return false; + buffer.allocate(bufSize); - ippiFilterSobelNegVertBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step, - (Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), - ippBorderRepl, 0, (Ipp8u*)(char*)buffer); - if(scale != 1) - ippiMulC_32f_C1IR((Ipp32f)scale, (Ipp32f *)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows)); + if (0 > ippiFilterSobelVertSecondBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step, + (Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), + ippBorderRepl, 0, (Ipp8u*)(char*)buffer)) + { + return false; + } + if(scale != 1) + ippiMulC_32f_C1R((Ipp32f *)dst.data, (int)dst.step, (Ipp32f)scale, (Ipp32f *)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows)); + return true; + } - return true; - } + if((dx == 0) && (dy == 2)) + { + if (0 > ippiFilterSobelHorizSecondGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize)) + return false; + buffer.allocate(bufSize); - if((dx == 0) && (dy == 1)) - { - ippiFilterSobelHorizGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize); - buffer.allocate(bufSize); + if (0 > ippiFilterSobelHorizSecondBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step, + (Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), + ippBorderRepl, 0, (Ipp8u*)(char*)buffer)) + { + return false; + } - ippiFilterSobelHorizBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step, - (Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), - ippBorderRepl, 0, (Ipp8u*)(char*)buffer); - if(scale != 1) - ippiMulC_32f_C1IR((Ipp32f)scale, (Ipp32f *)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows)); + if(scale != 1) + ippiMulC_32f_C1R((Ipp32f *)dst.data, (int)dst.step, (Ipp32f)scale, (Ipp32f *)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows)); + return true; + } + } + } - return true; - } - - if((dx == 2) && (dy == 0)) - { - ippiFilterSobelVertSecondGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize); - buffer.allocate(bufSize); - - ippiFilterSobelVertSecondBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step, - (Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), - ippBorderRepl, 0, (Ipp8u*)(char*)buffer); - if(scale != 1) - ippiMulC_32f_C1IR((Ipp32f)scale, (Ipp32f *)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows)); - - return true; - } - - if((dx == 0) && (dy == 2)) - { - ippiFilterSobelHorizSecondGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize); - buffer.allocate(bufSize); - - ippiFilterSobelHorizSecondBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step, - (Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), - ippBorderRepl, 0, (Ipp8u*)(char*)buffer); - if(scale != 1) - ippiMulC_32f_C1IR((Ipp32f)scale, (Ipp32f *)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows)); - - return true; - } - } - } - - if(ksize <= 0) - return IPPDerivScharr(src, dst, ddepth, dx, dy, scale); - - return false; + if(ksize <= 0) + return IPPDerivScharr(src, dst, ddepth, dx, dy, scale); + return false; } } @@ -436,7 +444,7 @@ void cv::Sobel( InputArray _src, OutputArray _dst, int ddepth, int dx, int dy, if(dx < 3 && dy < 3 && cn == 1 && borderType == BORDER_REPLICATE) { Mat src = _src.getMat(), dst = _dst.getMat(); - if(IPPDeriv(src, dst, ddepth, dx, dy, ksize,scale)) + if (IPPDeriv(src, dst, ddepth, dx, dy, ksize,scale)) return; } #endif diff --git a/modules/imgproc/src/filter.cpp b/modules/imgproc/src/filter.cpp index 2bc6b8a70..d23f87ef4 100644 --- a/modules/imgproc/src/filter.cpp +++ b/modules/imgproc/src/filter.cpp @@ -1420,36 +1420,16 @@ struct RowVec_32f int operator()(const uchar* _src, uchar* _dst, int width, int cn) const { +#ifdef USE_IPP_SEP_FILTERS + int ret = ippiOperator(_src, _dst, width, cn); + if (ret > 0) + return ret; +#endif int _ksize = kernel.rows + kernel.cols - 1; const float* src0 = (const float*)_src; float* dst = (float*)_dst; const float* _kx = (const float*)kernel.data; -#ifdef USE_IPP_SEP_FILTERS - IppiSize roisz = { width, 1 }; - if( (cn == 1 || cn == 3) && width >= _ksize*8 ) - { - if( bufsz < 0 ) - { - if( (cn == 1 && ippiFilterRowBorderPipelineGetBufferSize_32f_C1R(roisz, _ksize, &bufsz) < 0) || - (cn == 3 && ippiFilterRowBorderPipelineGetBufferSize_32f_C3R(roisz, _ksize, &bufsz) < 0)) - return 0; - } - AutoBuffer buf(bufsz + 64); - uchar* bufptr = alignPtr((uchar*)buf, 32); - int step = (int)(width*sizeof(dst[0])*cn); - float borderValue[] = {0.f, 0.f, 0.f}; - // here is the trick. IPP needs border type and extrapolates the row. We did it already. - // So we pass anchor=0 and ignore the right tail of results since they are incorrect there. - if( (cn == 1 && ippiFilterRowBorderPipeline_32f_C1R(src0, step, &dst, roisz, _kx, _ksize, 0, - ippBorderRepl, borderValue[0], bufptr) < 0) || - (cn == 3 && ippiFilterRowBorderPipeline_32f_C3R(src0, step, &dst, roisz, _kx, _ksize, 0, - ippBorderRepl, borderValue, bufptr) < 0)) - return 0; - return width - _ksize + 1; - } -#endif - if( !haveSSE ) return 0; @@ -1479,7 +1459,38 @@ struct RowVec_32f Mat kernel; bool haveSSE; #ifdef USE_IPP_SEP_FILTERS +private: mutable int bufsz; + int ippiOperator(const uchar* _src, uchar* _dst, int width, int cn) const + { + int _ksize = kernel.rows + kernel.cols - 1; + if ((1 != cn && 3 != cn) || width < _ksize*8) + return 0; + + const float* src = (const float*)_src; + float* dst = (float*)_dst; + const float* _kx = (const float*)kernel.data; + + IppiSize roisz = { width, 1 }; + if( bufsz < 0 ) + { + if( (cn == 1 && ippiFilterRowBorderPipelineGetBufferSize_32f_C1R(roisz, _ksize, &bufsz) < 0) || + (cn == 3 && ippiFilterRowBorderPipelineGetBufferSize_32f_C3R(roisz, _ksize, &bufsz) < 0)) + return 0; + } + AutoBuffer buf(bufsz + 64); + uchar* bufptr = alignPtr((uchar*)buf, 32); + int step = (int)(width*sizeof(dst[0])*cn); + float borderValue[] = {0.f, 0.f, 0.f}; + // here is the trick. IPP needs border type and extrapolates the row. We did it already. + // So we pass anchor=0 and ignore the right tail of results since they are incorrect there. + if( (cn == 1 && ippiFilterRowBorderPipeline_32f_C1R(src, step, &dst, roisz, _kx, _ksize, 0, + ippBorderRepl, borderValue[0], bufptr) < 0) || + (cn == 3 && ippiFilterRowBorderPipeline_32f_C3R(src, step, &dst, roisz, _kx, _ksize, 0, + ippBorderRepl, borderValue, bufptr) < 0)) + return 0; + return width - _ksize + 1; + } #endif }; diff --git a/modules/imgproc/src/imgwarp.cpp b/modules/imgproc/src/imgwarp.cpp index 9d3eabaad..9e5540a7b 100644 --- a/modules/imgproc/src/imgwarp.cpp +++ b/modules/imgproc/src/imgwarp.cpp @@ -1971,7 +1971,7 @@ public: CHECK_IPP_STATUS(getBufferSizeFunc(pSpec, dstSize, cn, &bufsize)); CHECK_IPP_STATUS(getSrcOffsetFunc(pSpec, dstOffset, &srcOffset)); - Ipp8u* pSrc = (Ipp8u*)src.data + (int)src.step[0] * srcOffset.y + srcOffset.x * cn * itemSize; + const Ipp8u* pSrc = (const Ipp8u*)src.data + (int)src.step[0] * srcOffset.y + srcOffset.x * cn * itemSize; Ipp8u* pDst = (Ipp8u*)dst.data + (int)dst.step[0] * dstOffset.y + dstOffset.x * cn * itemSize; AutoBuffer buf(bufsize + 64); @@ -1980,7 +1980,6 @@ public: if( func( pSrc, (int)src.step[0], pDst, (int)dst.step[0], dstOffset, dstSize, ippBorderRepl, 0, pSpec, bufptr ) < 0 ) *ok = false; } - private: const Mat & src; Mat & dst; @@ -4025,25 +4024,25 @@ public: *ok = true; } - virtual void operator() (const Range& range) const - { - IppiSize srcsize = { src.cols, src.rows }; - IppiRect srcroi = { 0, 0, src.cols, src.rows }; - IppiRect dstroi = { 0, range.start, dst.cols, range.end - range.start }; - int cnn = src.channels(); - if( borderType == BORDER_CONSTANT ) - { - IppiSize setSize = { dst.cols, range.end - range.start }; - void *dataPointer = dst.data + dst.step[0] * range.start; - if( !IPPSet( borderValue, dataPointer, (int)dst.step[0], setSize, cnn, src.depth() ) ) - { - *ok = false; - return; - } - } - if( func( src.data, srcsize, (int)src.step[0], srcroi, dst.data, (int)dst.step[0], dstroi, coeffs, mode ) < 0) ////Aug 2013: problem in IPP 7.1, 8.0 : sometimes function return ippStsCoeffErr - *ok = false; - } + virtual void operator() (const Range& range) const + { + IppiSize srcsize = { src.cols, src.rows }; + IppiRect srcroi = { 0, 0, src.cols, src.rows }; + IppiRect dstroi = { 0, range.start, dst.cols, range.end - range.start }; + int cnn = src.channels(); + if( borderType == BORDER_CONSTANT ) + { + IppiSize setSize = { dst.cols, range.end - range.start }; + void *dataPointer = dst.data + dst.step[0] * range.start; + if( !IPPSet( borderValue, dataPointer, (int)dst.step[0], setSize, cnn, src.depth() ) ) + { + *ok = false; + return; + } + } + if( func( src.data, srcsize, (int)src.step[0], srcroi, dst.data, (int)dst.step[0], dstroi, coeffs, mode ) < 0) ////Aug 2013: problem in IPP 7.1, 8.0 : sometimes function return ippStsCoeffErr + *ok = false; + } private: Mat &src; Mat &dst; @@ -4368,26 +4367,26 @@ public: *ok = true; } - virtual void operator() (const Range& range) const - { - IppiSize srcsize = {src.cols, src.rows}; - IppiRect srcroi = {0, 0, src.cols, src.rows}; - IppiRect dstroi = {0, range.start, dst.cols, range.end - range.start}; - int cnn = src.channels(); + virtual void operator() (const Range& range) const + { + IppiSize srcsize = {src.cols, src.rows}; + IppiRect srcroi = {0, 0, src.cols, src.rows}; + IppiRect dstroi = {0, range.start, dst.cols, range.end - range.start}; + int cnn = src.channels(); - if( borderType == BORDER_CONSTANT ) - { - IppiSize setSize = {dst.cols, range.end - range.start}; - void *dataPointer = dst.data + dst.step[0] * range.start; - if( !IPPSet( borderValue, dataPointer, (int)dst.step[0], setSize, cnn, src.depth() ) ) - { - *ok = false; - return; - } - } - if( func(src.data, srcsize, (int)src.step[0], srcroi, dst.data, (int)dst.step[0], dstroi, coeffs, mode) < 0) - *ok = false; - } + if( borderType == BORDER_CONSTANT ) + { + IppiSize setSize = {dst.cols, range.end - range.start}; + void *dataPointer = dst.data + dst.step[0] * range.start; + if( !IPPSet( borderValue, dataPointer, (int)dst.step[0], setSize, cnn, src.depth() ) ) + { + *ok = false; + return; + } + } + if( func(src.data, srcsize, (int)src.step[0], srcroi, dst.data, (int)dst.step[0], dstroi, coeffs, mode) < 0) + *ok = false; + } private: Mat &src; Mat &dst; diff --git a/modules/imgproc/src/morph.cpp b/modules/imgproc/src/morph.cpp index 1dd0a252e..b3433549d 100644 --- a/modules/imgproc/src/morph.cpp +++ b/modules/imgproc/src/morph.cpp @@ -1136,80 +1136,128 @@ private: Scalar borderValue; }; -#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) +#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 8) && (IPP_VERSION_MINOR >= 1) static bool IPPMorphReplicate(int op, const Mat &src, Mat &dst, const Mat &kernel, const Size& ksize, const Point &anchor, bool rectKernel) { int type = src.type(); const Mat* _src = &src; Mat temp; - if( src.data == dst.data ) + if (src.data == dst.data) { src.copyTo(temp); _src = &temp; } - //DEPRECATED. Allocates and initializes morphology state structure for erosion or dilation operation. - typedef IppStatus (CV_STDCALL* ippiMorphologyInitAllocFunc)(int, const void*, IppiSize, IppiPoint, IppiMorphState **); - typedef IppStatus (CV_STDCALL* ippiMorphologyBorderReplicateFunc)(const void*, int, void *, int, - IppiSize, IppiBorderType, IppiMorphState *); - typedef IppStatus (CV_STDCALL* ippiFilterMinMaxGetBufferSizeFunc)(int, IppiSize, int*); - typedef IppStatus (CV_STDCALL* ippiFilterMinMaxBorderReplicateFunc)(const void*, int, void*, int, - IppiSize, IppiSize, IppiPoint, void*); - - ippiMorphologyInitAllocFunc initAllocFunc = 0; - ippiMorphologyBorderReplicateFunc morphFunc = 0; - ippiFilterMinMaxGetBufferSizeFunc getBufSizeFunc = 0; - ippiFilterMinMaxBorderReplicateFunc morphRectFunc = 0; - - #define IPP_MORPH_CASE(type, flavor) \ - case type: \ - initAllocFunc = (ippiMorphologyInitAllocFunc)ippiMorphologyInitAlloc_##flavor; \ - morphFunc = op == MORPH_ERODE ? (ippiMorphologyBorderReplicateFunc)ippiErodeBorderReplicate_##flavor : \ - (ippiMorphologyBorderReplicateFunc)ippiDilateBorderReplicate_##flavor; \ - getBufSizeFunc = (ippiFilterMinMaxGetBufferSizeFunc)ippiFilterMinGetBufferSize_##flavor; \ - morphRectFunc = op == MORPH_ERODE ? (ippiFilterMinMaxBorderReplicateFunc)ippiFilterMinBorderReplicate_##flavor : \ - (ippiFilterMinMaxBorderReplicateFunc)ippiFilterMaxBorderReplicate_##flavor; \ - break - - switch( type ) - { - IPP_MORPH_CASE(CV_8UC1, 8u_C1R); - IPP_MORPH_CASE(CV_8UC3, 8u_C3R); - IPP_MORPH_CASE(CV_8UC4, 8u_C4R); - IPP_MORPH_CASE(CV_32FC1, 32f_C1R); - IPP_MORPH_CASE(CV_32FC3, 32f_C3R); - IPP_MORPH_CASE(CV_32FC4, 32f_C4R); - default: - return false; - } - #undef IPP_MORPH_CASE IppiSize roiSize = {src.cols, src.rows}; IppiSize kernelSize = {ksize.width, ksize.height}; - IppiPoint point = {anchor.x, anchor.y}; - if( !rectKernel && morphFunc && initAllocFunc ) + if (!rectKernel) { - IppiMorphState* pState; - if( initAllocFunc( roiSize.width, kernel.data, kernelSize, point, &pState ) < 0 ) +#if 1 + if (((kernel.cols - 1) / 2 != anchor.x) || ((kernel.rows - 1) / 2 != anchor.y)) return false; - bool is_ok = morphFunc( _src->data, (int)_src->step[0], - dst.data, (int)dst.step[0], - roiSize, ippBorderRepl, pState ) >= 0; - ippiMorphologyFree(pState); - return is_ok; + #define IPP_MORPH_CASE(cvtype, flavor, data_type) \ + case cvtype: \ + {\ + int specSize = 0, bufferSize = 0;\ + if (0 > ippiMorphologyBorderGetSize_##flavor(roiSize.width, kernelSize, &specSize, &bufferSize))\ + return false;\ + IppiMorphState *pSpec = (IppiMorphState*)ippMalloc(specSize);\ + Ipp8u *pBuffer = (Ipp8u*)ippMalloc(bufferSize);\ + if (0 > ippiMorphologyBorderInit_##flavor(roiSize.width, kernel.data, kernelSize, pSpec, pBuffer))\ + {\ + ippFree(pBuffer);\ + ippFree(pSpec);\ + return false;\ + }\ + bool ok = false;\ + if (op == MORPH_ERODE)\ + ok = (0 <= ippiErodeBorder_##flavor((Ipp##data_type *)_src->data, (int)_src->step[0], (Ipp##data_type *)dst.data, (int)dst.step[0],\ + roiSize, ippBorderRepl, 0, pSpec, pBuffer));\ + else\ + ok = (0 <= ippiDilateBorder_##flavor((Ipp##data_type *)_src->data, (int)_src->step[0], (Ipp##data_type *)dst.data, (int)dst.step[0],\ + roiSize, ippBorderRepl, 0, pSpec, pBuffer));\ + ippFree(pBuffer);\ + ippFree(pSpec);\ + return ok;\ + }\ + break; +#else + IppiPoint point = {anchor.x, anchor.y}; + // this is case, which can be used with the anchor not in center of the kernel, but + // ippiMorphologyBorderGetSize_, ippiErodeBorderReplicate_ and ippiDilateBorderReplicate_ are deprecated. + #define IPP_MORPH_CASE(cvtype, flavor, data_type) \ + case cvtype: \ + {\ + int specSize = 0;\ + int bufferSize = 0;\ + if (0 > ippiMorphologyGetSize_##flavor( roiSize.width, kernel.data kernelSize, &specSize))\ + return false;\ + bool ok = false;\ + IppiMorphState* pState = (IppiMorphState*)ippMalloc(specSize);\ + if (ippiMorphologyInit_##flavor(roiSize.width, kernel.data, kernelSize, point, pState) >= 0)\ + {\ + if (op == MORPH_ERODE)\ + ok = ippiErodeBorderReplicate_##flavor((Ipp##data_type *)_src->data, (int)_src->step[0],\ + (Ipp##data_type *)dst.data, (int)dst.step[0],\ + roiSize, ippBorderRepl, pState ) >= 0;\ + else\ + ok = ippiDilateBorderReplicate_##flavor((Ipp##data_type *)_src->data, (int)_src->step[0],\ + (Ipp##data_type *)dst.data, (int)dst.step[0],\ + roiSize, ippBorderRepl, pState ) >= 0;\ + }\ + ippFree(pState);\ + return ok;\ + }\ + break; +#endif + switch (type) + { + IPP_MORPH_CASE(CV_8UC1, 8u_C1R, 8u); + IPP_MORPH_CASE(CV_8UC3, 8u_C3R, 8u); + IPP_MORPH_CASE(CV_8UC4, 8u_C4R, 8u); + IPP_MORPH_CASE(CV_32FC1, 32f_C1R, 32f); + IPP_MORPH_CASE(CV_32FC3, 32f_C3R, 32f); + IPP_MORPH_CASE(CV_32FC4, 32f_C4R, 32f); + default: + return false; + } + + #undef IPP_MORPH_CASE } - else if( rectKernel && morphRectFunc && getBufSizeFunc ) + else { - int bufSize = 0; - if( getBufSizeFunc( src.cols, kernelSize, &bufSize) < 0 ) + IppiPoint point = {anchor.x, anchor.y}; + + #define IPP_MORPH_CASE(cvtype, flavor, data_type) \ + case cvtype: \ + {\ + int bufSize = 0;\ + if (0 > ippiFilterMinGetBufferSize_##flavor(src.cols, kernelSize, &bufSize))\ + return false;\ + AutoBuffer buf(bufSize + 64);\ + uchar* buffer = alignPtr((uchar*)buf, 32);\ + if (op == MORPH_ERODE)\ + return (0 <= ippiFilterMinBorderReplicate_##flavor((Ipp##data_type *)_src->data, (int)_src->step[0], (Ipp##data_type *)dst.data, (int)dst.step[0], roiSize, kernelSize, point, buffer));\ + return (0 <= ippiFilterMaxBorderReplicate_##flavor((Ipp##data_type *)_src->data, (int)_src->step[0], (Ipp##data_type *)dst.data, (int)dst.step[0], roiSize, kernelSize, point, buffer));\ + }\ + break; + + switch (type) + { + IPP_MORPH_CASE(CV_8UC1, 8u_C1R, 8u); + IPP_MORPH_CASE(CV_8UC3, 8u_C3R, 8u); + IPP_MORPH_CASE(CV_8UC4, 8u_C4R, 8u); + IPP_MORPH_CASE(CV_32FC1, 32f_C1R, 32f); + IPP_MORPH_CASE(CV_32FC3, 32f_C3R, 32f); + IPP_MORPH_CASE(CV_32FC4, 32f_C4R, 32f); + default: return false; - AutoBuffer buf(bufSize + 64); - uchar* buffer = alignPtr((uchar*)buf, 32); - return morphRectFunc(_src->data, (int)_src->step[0], dst.data, (int)dst.step[0], - roiSize, kernelSize, point, buffer) >= 0; + } + + #undef IPP_MORPH_CASE } - return false; } static bool IPPMorphOp(int op, InputArray _src, OutputArray _dst, @@ -1411,7 +1459,7 @@ static void morphOp( int op, InputArray _src, OutputArray _dst, Size ksize = kernel.data ? kernel.size() : Size(3,3); anchor = normalizeAnchor(anchor, ksize); -#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) +#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 8) && (IPP_VERSION_MINOR >= 1) if( IPPMorphOp(op, _src, _dst, kernel, anchor, iterations, borderType, borderValue) ) return; #endif diff --git a/modules/imgproc/src/smooth.cpp b/modules/imgproc/src/smooth.cpp index 6a18af5c2..7499e2468 100644 --- a/modules/imgproc/src/smooth.cpp +++ b/modules/imgproc/src/smooth.cpp @@ -1109,20 +1109,27 @@ void cv::GaussianBlur( InputArray _src, OutputArray _dst, Size ksize, return; #endif -#if defined HAVE_IPP && (IPP_VERSION_MAJOR >= 7) +#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 8) && (IPP_VERSION_MINOR >= 1) if( type == CV_32FC1 && sigma1 == sigma2 && ksize.width == ksize.height && sigma1 != 0.0 ) { Mat src = _src.getMat(), dst = _dst.getMat(); IppiSize roi = { src.cols, src.rows }; - int bufSize = 0; - ippiFilterGaussGetBufferSize_32f_C1R(roi, ksize.width, &bufSize); - AutoBuffer buf(bufSize+128); - if( ippiFilterGaussBorder_32f_C1R((const Ipp32f *)src.data, (int)src.step, - (Ipp32f *)dst.data, (int)dst.step, - roi, ksize.width, (Ipp32f)sigma1, - (IppiBorderType)borderType, 0.0, - alignPtr(&buf[0],32)) >= 0 ) - return; + int specSize = 0, bufferSize = 0; + if (0 <= ippiFilterGaussianGetBufferSize(roi, (Ipp32u)ksize.width, ipp32f, 1, &specSize, &bufferSize)) + { + IppFilterGaussianSpec *pSpec = (IppFilterGaussianSpec*)ippMalloc(specSize); + Ipp8u *pBuffer = (Ipp8u*)ippMalloc(bufferSize); + if (0 <= ippiFilterGaussianInit(roi, (Ipp32u)ksize.width, (Ipp32f)sigma1, (IppiBorderType)borderType, ipp32f, 1, pSpec, pBuffer)) + { + IppStatus sts = ippiFilterGaussianBorder_32f_C1R( (const Ipp32f *)src.data, (int)src.step, + (Ipp32f *)dst.data, (int)dst.step, + roi, 0.0, pSpec, pBuffer); + ippFree(pBuffer); + ippFree(pSpec); + if (0 <= sts) + return; + } + } } #endif @@ -2180,11 +2187,19 @@ public: IppiSize kernel = {d, d}; IppiSize roi={dst.cols, range.end - range.start}; int bufsize=0; - ippiFilterBilateralGetBufSize_8u_C1R( ippiFilterBilateralGauss, roi, kernel, &bufsize); + if (0 > ippiFilterBilateralGetBufSize_8u_C1R( ippiFilterBilateralGauss, roi, kernel, &bufsize)) + { + *ok = false; + return; + } AutoBuffer buf(bufsize); IppiFilterBilateralSpec *pSpec = (IppiFilterBilateralSpec *)alignPtr(&buf[0], 32); - ippiFilterBilateralInit_8u_C1R( ippiFilterBilateralGauss, kernel, (Ipp32f)sigma_color, (Ipp32f)sigma_space, 1, pSpec ); - if( ippiFilterBilateral_8u_C1R( src.ptr(range.start) + radius * ((int)src.step[0] + 1), (int)src.step[0], dst.ptr(range.start), (int)dst.step[0], roi, kernel, pSpec ) < 0) + if (0 > ippiFilterBilateralInit_8u_C1R( ippiFilterBilateralGauss, kernel, (Ipp32f)sigma_color, (Ipp32f)sigma_space, 1, pSpec )) + { + *ok = false; + return; + } + if (0 > ippiFilterBilateral_8u_C1R( src.ptr(range.start) + radius * ((int)src.step[0] + 1), (int)src.step[0], dst.ptr(range.start), (int)dst.step[0], roi, kernel, pSpec )) *ok = false; } private: diff --git a/modules/imgproc/src/sumpixels.cpp b/modules/imgproc/src/sumpixels.cpp index 4e18f119f..c32813fab 100644 --- a/modules/imgproc/src/sumpixels.cpp +++ b/modules/imgproc/src/sumpixels.cpp @@ -365,30 +365,32 @@ void cv::integral( InputArray _src, OutputArray _sum, OutputArray _sqsum, Output #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) if( ( depth == CV_8U ) && ( sdepth == CV_32F || sdepth == CV_32S ) && ( !_tilted.needed() ) && ( !_sqsum.needed() || sqdepth == CV_64F ) && ( cn == 1 ) ) { + IppStatus status = ippStsErr; IppiSize srcRoiSize = ippiSize( src.cols, src.rows ); if( sdepth == CV_32F ) { if( _sqsum.needed() ) { - ippiSqrIntegral_8u32f64f_C1R( (const Ipp8u*)src.data, (int)src.step, (Ipp32f*)sum.data, (int)sum.step, (Ipp64f*)sqsum.data, (int)sqsum.step, srcRoiSize, 0, 0 ); + status = ippiSqrIntegral_8u32f64f_C1R( (const Ipp8u*)src.data, (int)src.step, (Ipp32f*)sum.data, (int)sum.step, (Ipp64f*)sqsum.data, (int)sqsum.step, srcRoiSize, 0, 0 ); } else { - ippiIntegral_8u32f_C1R( (const Ipp8u*)src.data, (int)src.step, (Ipp32f*)sum.data, (int)sum.step, srcRoiSize, 0 ); + status = ippiIntegral_8u32f_C1R( (const Ipp8u*)src.data, (int)src.step, (Ipp32f*)sum.data, (int)sum.step, srcRoiSize, 0 ); } } else if( sdepth == CV_32S ) { if( _sqsum.needed() ) { - ippiSqrIntegral_8u32s64f_C1R( (const Ipp8u*)src.data, (int)src.step, (Ipp32s*)sum.data, (int)sum.step, (Ipp64f*)sqsum.data, (int)sqsum.step, srcRoiSize, 0, 0 ); + status = ippiSqrIntegral_8u32s64f_C1R( (const Ipp8u*)src.data, (int)src.step, (Ipp32s*)sum.data, (int)sum.step, (Ipp64f*)sqsum.data, (int)sqsum.step, srcRoiSize, 0, 0 ); } else { - ippiIntegral_8u32s_C1R( (const Ipp8u*)src.data, (int)src.step, (Ipp32s*)sum.data, (int)sum.step, srcRoiSize, 0 ); + status = ippiIntegral_8u32s_C1R( (const Ipp8u*)src.data, (int)src.step, (Ipp32s*)sum.data, (int)sum.step, srcRoiSize, 0 ); } } - return; + if (0 <= status) + return; } #endif diff --git a/modules/objdetect/src/haar.cpp b/modules/objdetect/src/haar.cpp index 2f864797f..4b2d2b0d5 100644 --- a/modules/objdetect/src/haar.cpp +++ b/modules/objdetect/src/haar.cpp @@ -1318,9 +1318,9 @@ public: if( cascade->hid_cascade->ipp_stages ) { IppiRect iequRect = {equRect.x, equRect.y, equRect.width, equRect.height}; - ippiRectStdDev_32f_C1R(sum1.ptr(y1), sum1.step, - sqsum1.ptr(y1), sqsum1.step, - norm1->ptr(y1), norm1->step, + ippiRectStdDev_32f_C1R(sum1.ptr(y1), (int)sum1.step, + sqsum1.ptr(y1), (int)sqsum1.step, + norm1->ptr(y1), (int)norm1->step, ippiSize(ssz.width, ssz.height), iequRect ); int positive = (ssz.width/ystep)*((ssz.height + ystep-1)/ystep); @@ -1341,9 +1341,9 @@ public: for( int j = 0; j < cascade->count; j++ ) { if( ippiApplyHaarClassifier_32f_C1R( - sum1.ptr(y1), sum1.step, - norm1->ptr(y1), norm1->step, - mask1->ptr(y1), mask1->step, + sum1.ptr(y1), (int)sum1.step, + norm1->ptr(y1), (int)norm1->step, + mask1->ptr(y1), (int)mask1->step, ippiSize(ssz.width, ssz.height), &positive, cascade->hid_cascade->stage_classifier[j].threshold, (IppiHaarClassifier_32f*)cascade->hid_cascade->ipp_stages[j]) < 0 )