Implementation detector and selector for IPP and OpenCL;
IPP can be switched on and off on runtime; Optional implementation collector was added (switched off by default in CMake). Gathers data of implementation used in functions and report this info through performance TS; TS modifications for implementations control;
This commit is contained in:
@@ -2821,11 +2821,17 @@ static double dotProd_8u(const uchar* src1, const uchar* src2, int len)
|
||||
{
|
||||
double r = 0;
|
||||
#if ARITHM_USE_IPP && 0
|
||||
if (0 <= ippiDotProd_8u64f_C1R(src1, (int)(len*sizeof(src1[0])),
|
||||
src2, (int)(len*sizeof(src2[0])),
|
||||
ippiSize(len, 1), &r))
|
||||
return r;
|
||||
setIppErrorStatus();
|
||||
CV_IPP_CHECK()
|
||||
{
|
||||
if (0 <= ippiDotProd_8u64f_C1R(src1, (int)(len*sizeof(src1[0])),
|
||||
src2, (int)(len*sizeof(src2[0])),
|
||||
ippiSize(len, 1), &r))
|
||||
{
|
||||
CV_IMPL_ADD(CV_IMPL_IPP);
|
||||
return r;
|
||||
}
|
||||
setIppErrorStatus();
|
||||
}
|
||||
#endif
|
||||
int i = 0;
|
||||
|
||||
@@ -2968,10 +2974,16 @@ static double dotProd_8s(const schar* src1, const schar* src2, int len)
|
||||
static double dotProd_16u(const ushort* src1, const ushort* src2, int len)
|
||||
{
|
||||
#if (ARITHM_USE_IPP == 1)
|
||||
double r = 0;
|
||||
if (0 <= ippiDotProd_16u64f_C1R(src1, (int)(len*sizeof(src1[0])), src2, (int)(len*sizeof(src2[0])), ippiSize(len, 1), &r))
|
||||
return r;
|
||||
setIppErrorStatus();
|
||||
CV_IPP_CHECK()
|
||||
{
|
||||
double r = 0;
|
||||
if (0 <= ippiDotProd_16u64f_C1R(src1, (int)(len*sizeof(src1[0])), src2, (int)(len*sizeof(src2[0])), ippiSize(len, 1), &r))
|
||||
{
|
||||
CV_IMPL_ADD(CV_IMPL_IPP);
|
||||
return r;
|
||||
}
|
||||
setIppErrorStatus();
|
||||
}
|
||||
#endif
|
||||
return dotProd_(src1, src2, len);
|
||||
}
|
||||
@@ -2979,10 +2991,16 @@ static double dotProd_16u(const ushort* src1, const ushort* src2, int len)
|
||||
static double dotProd_16s(const short* src1, const short* src2, int len)
|
||||
{
|
||||
#if (ARITHM_USE_IPP == 1)
|
||||
double r = 0;
|
||||
if (0 <= ippiDotProd_16s64f_C1R(src1, (int)(len*sizeof(src1[0])), src2, (int)(len*sizeof(src2[0])), ippiSize(len, 1), &r))
|
||||
return r;
|
||||
setIppErrorStatus();
|
||||
CV_IPP_CHECK()
|
||||
{
|
||||
double r = 0;
|
||||
if (0 <= ippiDotProd_16s64f_C1R(src1, (int)(len*sizeof(src1[0])), src2, (int)(len*sizeof(src2[0])), ippiSize(len, 1), &r))
|
||||
{
|
||||
CV_IMPL_ADD(CV_IMPL_IPP);
|
||||
return r;
|
||||
}
|
||||
setIppErrorStatus();
|
||||
}
|
||||
#endif
|
||||
return dotProd_(src1, src2, len);
|
||||
}
|
||||
@@ -2990,10 +3008,16 @@ static double dotProd_16s(const short* src1, const short* src2, int len)
|
||||
static double dotProd_32s(const int* src1, const int* src2, int len)
|
||||
{
|
||||
#if (ARITHM_USE_IPP == 1)
|
||||
double r = 0;
|
||||
if (0 <= ippiDotProd_32s64f_C1R(src1, (int)(len*sizeof(src1[0])), src2, (int)(len*sizeof(src2[0])), ippiSize(len, 1), &r))
|
||||
return r;
|
||||
setIppErrorStatus();
|
||||
CV_IPP_CHECK()
|
||||
{
|
||||
double r = 0;
|
||||
if (0 <= ippiDotProd_32s64f_C1R(src1, (int)(len*sizeof(src1[0])), src2, (int)(len*sizeof(src2[0])), ippiSize(len, 1), &r))
|
||||
{
|
||||
CV_IMPL_ADD(CV_IMPL_IPP);
|
||||
return r;
|
||||
}
|
||||
setIppErrorStatus();
|
||||
}
|
||||
#endif
|
||||
return dotProd_(src1, src2, len);
|
||||
}
|
||||
@@ -3004,9 +3028,15 @@ static double dotProd_32f(const float* src1, const float* src2, int len)
|
||||
int i = 0;
|
||||
|
||||
#if (ARITHM_USE_IPP == 1)
|
||||
if (0 <= ippsDotProd_32f64f(src1, src2, len, &r))
|
||||
return r;
|
||||
setIppErrorStatus();
|
||||
CV_IPP_CHECK()
|
||||
{
|
||||
if (0 <= ippsDotProd_32f64f(src1, src2, len, &r))
|
||||
{
|
||||
CV_IMPL_ADD(CV_IMPL_IPP);
|
||||
return r;
|
||||
}
|
||||
setIppErrorStatus();
|
||||
}
|
||||
#elif CV_NEON
|
||||
int len0 = len & -4, blockSize0 = (1 << 13), blockSize;
|
||||
float32x4_t v_zero = vdupq_n_f32(0.0f);
|
||||
@@ -3035,10 +3065,16 @@ static double dotProd_32f(const float* src1, const float* src2, int len)
|
||||
static double dotProd_64f(const double* src1, const double* src2, int len)
|
||||
{
|
||||
#if (ARITHM_USE_IPP == 1)
|
||||
double r = 0;
|
||||
if (0 <= ippsDotProd_64f(src1, src2, len, &r))
|
||||
return r;
|
||||
setIppErrorStatus();
|
||||
CV_IPP_CHECK()
|
||||
{
|
||||
double r = 0;
|
||||
if (0 <= ippsDotProd_64f(src1, src2, len, &r))
|
||||
{
|
||||
CV_IMPL_ADD(CV_IMPL_IPP);
|
||||
return r;
|
||||
}
|
||||
setIppErrorStatus();
|
||||
}
|
||||
#endif
|
||||
return dotProd_(src1, src2, len);
|
||||
}
|
||||
|
Reference in New Issue
Block a user