Implementation detector and selector for IPP and OpenCL;

IPP can be switched on and off on runtime;

Optional implementation collector was added (switched off by default in CMake). Gathers data of implementation used in functions and report this info through performance TS;

TS modifications for implementations control;
This commit is contained in:
Pavel Vlasov
2014-10-03 15:17:28 +04:00
parent 83ef276697
commit 45958eaabc
49 changed files with 4279 additions and 2799 deletions

View File

@@ -2821,11 +2821,17 @@ static double dotProd_8u(const uchar* src1, const uchar* src2, int len)
{
double r = 0;
#if ARITHM_USE_IPP && 0
if (0 <= ippiDotProd_8u64f_C1R(src1, (int)(len*sizeof(src1[0])),
src2, (int)(len*sizeof(src2[0])),
ippiSize(len, 1), &r))
return r;
setIppErrorStatus();
CV_IPP_CHECK()
{
if (0 <= ippiDotProd_8u64f_C1R(src1, (int)(len*sizeof(src1[0])),
src2, (int)(len*sizeof(src2[0])),
ippiSize(len, 1), &r))
{
CV_IMPL_ADD(CV_IMPL_IPP);
return r;
}
setIppErrorStatus();
}
#endif
int i = 0;
@@ -2968,10 +2974,16 @@ static double dotProd_8s(const schar* src1, const schar* src2, int len)
static double dotProd_16u(const ushort* src1, const ushort* src2, int len)
{
#if (ARITHM_USE_IPP == 1)
double r = 0;
if (0 <= ippiDotProd_16u64f_C1R(src1, (int)(len*sizeof(src1[0])), src2, (int)(len*sizeof(src2[0])), ippiSize(len, 1), &r))
return r;
setIppErrorStatus();
CV_IPP_CHECK()
{
double r = 0;
if (0 <= ippiDotProd_16u64f_C1R(src1, (int)(len*sizeof(src1[0])), src2, (int)(len*sizeof(src2[0])), ippiSize(len, 1), &r))
{
CV_IMPL_ADD(CV_IMPL_IPP);
return r;
}
setIppErrorStatus();
}
#endif
return dotProd_(src1, src2, len);
}
@@ -2979,10 +2991,16 @@ static double dotProd_16u(const ushort* src1, const ushort* src2, int len)
static double dotProd_16s(const short* src1, const short* src2, int len)
{
#if (ARITHM_USE_IPP == 1)
double r = 0;
if (0 <= ippiDotProd_16s64f_C1R(src1, (int)(len*sizeof(src1[0])), src2, (int)(len*sizeof(src2[0])), ippiSize(len, 1), &r))
return r;
setIppErrorStatus();
CV_IPP_CHECK()
{
double r = 0;
if (0 <= ippiDotProd_16s64f_C1R(src1, (int)(len*sizeof(src1[0])), src2, (int)(len*sizeof(src2[0])), ippiSize(len, 1), &r))
{
CV_IMPL_ADD(CV_IMPL_IPP);
return r;
}
setIppErrorStatus();
}
#endif
return dotProd_(src1, src2, len);
}
@@ -2990,10 +3008,16 @@ static double dotProd_16s(const short* src1, const short* src2, int len)
static double dotProd_32s(const int* src1, const int* src2, int len)
{
#if (ARITHM_USE_IPP == 1)
double r = 0;
if (0 <= ippiDotProd_32s64f_C1R(src1, (int)(len*sizeof(src1[0])), src2, (int)(len*sizeof(src2[0])), ippiSize(len, 1), &r))
return r;
setIppErrorStatus();
CV_IPP_CHECK()
{
double r = 0;
if (0 <= ippiDotProd_32s64f_C1R(src1, (int)(len*sizeof(src1[0])), src2, (int)(len*sizeof(src2[0])), ippiSize(len, 1), &r))
{
CV_IMPL_ADD(CV_IMPL_IPP);
return r;
}
setIppErrorStatus();
}
#endif
return dotProd_(src1, src2, len);
}
@@ -3004,9 +3028,15 @@ static double dotProd_32f(const float* src1, const float* src2, int len)
int i = 0;
#if (ARITHM_USE_IPP == 1)
if (0 <= ippsDotProd_32f64f(src1, src2, len, &r))
return r;
setIppErrorStatus();
CV_IPP_CHECK()
{
if (0 <= ippsDotProd_32f64f(src1, src2, len, &r))
{
CV_IMPL_ADD(CV_IMPL_IPP);
return r;
}
setIppErrorStatus();
}
#elif CV_NEON
int len0 = len & -4, blockSize0 = (1 << 13), blockSize;
float32x4_t v_zero = vdupq_n_f32(0.0f);
@@ -3035,10 +3065,16 @@ static double dotProd_32f(const float* src1, const float* src2, int len)
static double dotProd_64f(const double* src1, const double* src2, int len)
{
#if (ARITHM_USE_IPP == 1)
double r = 0;
if (0 <= ippsDotProd_64f(src1, src2, len, &r))
return r;
setIppErrorStatus();
CV_IPP_CHECK()
{
double r = 0;
if (0 <= ippsDotProd_64f(src1, src2, len, &r))
{
CV_IMPL_ADD(CV_IMPL_IPP);
return r;
}
setIppErrorStatus();
}
#endif
return dotProd_(src1, src2, len);
}