Merge pull request #3987 from vpisarev:core_fixes_part_1
This commit is contained in:
commit
f49544f310
@ -229,6 +229,18 @@ if(WITH_FFMPEG)
|
||||
find_library(FFMPEG_UTIL_LIB "avutil" HINTS "${FFMPEG_LIB_DIR}")
|
||||
find_library(FFMPEG_SWSCALE_LIB "swscale" HINTS "${FFMPEG_LIB_DIR}")
|
||||
find_library(FFMPEG_RESAMPLE_LIB "avresample" HINTS "${FFMPEG_LIB_DIR}")
|
||||
if(FFMPEG_CODEC_LIB)
|
||||
set(HAVE_FFMPEG_CODEC 1)
|
||||
endif()
|
||||
if(FFMPEG_FORMAT_LIB)
|
||||
set(HAVE_FFMPEG_FORMAT 1)
|
||||
endif()
|
||||
if(FFMPEG_UTIL_LIB)
|
||||
set(HAVE_FFMPEG_UTIL 1)
|
||||
endif()
|
||||
if(FFMPEG_SWSCALE_LIB)
|
||||
set(HAVE_FFMPEG_SWSCALE 1)
|
||||
endif()
|
||||
if(FFMPEG_CODEC_LIB AND FFMPEG_FORMAT_LIB AND
|
||||
FFMPEG_UTIL_LIB AND FFMPEG_SWSCALE_LIB)
|
||||
set(ALIASOF_libavcodec_VERSION "Unknown")
|
||||
|
@ -163,7 +163,8 @@ public:
|
||||
CUDA_HOST_MEM = 8 << KIND_SHIFT,
|
||||
CUDA_GPU_MAT = 9 << KIND_SHIFT,
|
||||
UMAT =10 << KIND_SHIFT,
|
||||
STD_VECTOR_UMAT =11 << KIND_SHIFT
|
||||
STD_VECTOR_UMAT =11 << KIND_SHIFT,
|
||||
STD_BOOL_VECTOR =12 << KIND_SHIFT
|
||||
};
|
||||
|
||||
_InputArray();
|
||||
@ -173,6 +174,7 @@ public:
|
||||
_InputArray(const std::vector<Mat>& vec);
|
||||
template<typename _Tp> _InputArray(const Mat_<_Tp>& m);
|
||||
template<typename _Tp> _InputArray(const std::vector<_Tp>& vec);
|
||||
_InputArray(const std::vector<bool>& vec);
|
||||
template<typename _Tp> _InputArray(const std::vector<std::vector<_Tp> >& vec);
|
||||
template<typename _Tp> _InputArray(const std::vector<Mat_<_Tp> >& vec);
|
||||
template<typename _Tp> _InputArray(const _Tp* vec, int n);
|
||||
@ -284,6 +286,7 @@ public:
|
||||
_OutputArray(cuda::HostMem& cuda_mem);
|
||||
template<typename _Tp> _OutputArray(cudev::GpuMat_<_Tp>& m);
|
||||
template<typename _Tp> _OutputArray(std::vector<_Tp>& vec);
|
||||
_OutputArray(std::vector<bool>& vec);
|
||||
template<typename _Tp> _OutputArray(std::vector<std::vector<_Tp> >& vec);
|
||||
template<typename _Tp> _OutputArray(std::vector<Mat_<_Tp> >& vec);
|
||||
template<typename _Tp> _OutputArray(Mat_<_Tp>& m);
|
||||
@ -340,6 +343,7 @@ public:
|
||||
_InputOutputArray(cuda::HostMem& cuda_mem);
|
||||
template<typename _Tp> _InputOutputArray(cudev::GpuMat_<_Tp>& m);
|
||||
template<typename _Tp> _InputOutputArray(std::vector<_Tp>& vec);
|
||||
_InputOutputArray(std::vector<bool>& vec);
|
||||
template<typename _Tp> _InputOutputArray(std::vector<std::vector<_Tp> >& vec);
|
||||
template<typename _Tp> _InputOutputArray(std::vector<Mat_<_Tp> >& vec);
|
||||
template<typename _Tp> _InputOutputArray(Mat_<_Tp>& m);
|
||||
|
@ -77,6 +77,10 @@ template<typename _Tp> inline
|
||||
_InputArray::_InputArray(const std::vector<_Tp>& vec)
|
||||
{ init(FIXED_TYPE + STD_VECTOR + DataType<_Tp>::type + ACCESS_READ, &vec); }
|
||||
|
||||
inline
|
||||
_InputArray::_InputArray(const std::vector<bool>& vec)
|
||||
{ init(FIXED_TYPE + STD_BOOL_VECTOR + DataType<bool>::type + ACCESS_READ, &vec); }
|
||||
|
||||
template<typename _Tp> inline
|
||||
_InputArray::_InputArray(const std::vector<std::vector<_Tp> >& vec)
|
||||
{ init(FIXED_TYPE + STD_VECTOR_VECTOR + DataType<_Tp>::type + ACCESS_READ, &vec); }
|
||||
@ -140,6 +144,10 @@ template<typename _Tp> inline
|
||||
_OutputArray::_OutputArray(std::vector<_Tp>& vec)
|
||||
{ init(FIXED_TYPE + STD_VECTOR + DataType<_Tp>::type + ACCESS_WRITE, &vec); }
|
||||
|
||||
inline
|
||||
_OutputArray::_OutputArray(std::vector<bool>&)
|
||||
{ CV_Error(Error::StsUnsupportedFormat, "std::vector<bool> cannot be an output array\n"); }
|
||||
|
||||
template<typename _Tp> inline
|
||||
_OutputArray::_OutputArray(std::vector<std::vector<_Tp> >& vec)
|
||||
{ init(FIXED_TYPE + STD_VECTOR_VECTOR + DataType<_Tp>::type + ACCESS_WRITE, &vec); }
|
||||
@ -227,6 +235,9 @@ template<typename _Tp> inline
|
||||
_InputOutputArray::_InputOutputArray(std::vector<_Tp>& vec)
|
||||
{ init(FIXED_TYPE + STD_VECTOR + DataType<_Tp>::type + ACCESS_RW, &vec); }
|
||||
|
||||
inline _InputOutputArray::_InputOutputArray(std::vector<bool>&)
|
||||
{ CV_Error(Error::StsUnsupportedFormat, "std::vector<bool> cannot be an input/output array\n"); }
|
||||
|
||||
template<typename _Tp> inline
|
||||
_InputOutputArray::_InputOutputArray(std::vector<std::vector<_Tp> >& vec)
|
||||
{ init(FIXED_TYPE + STD_VECTOR_VECTOR + DataType<_Tp>::type + ACCESS_RW, &vec); }
|
||||
@ -1464,13 +1475,15 @@ Mat_<_Tp> Mat_<_Tp>::operator()( const Range* ranges ) const
|
||||
template<typename _Tp> inline
|
||||
_Tp* Mat_<_Tp>::operator [](int y)
|
||||
{
|
||||
return (_Tp*)ptr(y);
|
||||
CV_DbgAssert( 0 <= y && y < rows );
|
||||
return (_Tp*)(data + y*step.p[0]);
|
||||
}
|
||||
|
||||
template<typename _Tp> inline
|
||||
const _Tp* Mat_<_Tp>::operator [](int y) const
|
||||
{
|
||||
return (const _Tp*)ptr(y);
|
||||
CV_DbgAssert( 0 <= y && y < rows );
|
||||
return (const _Tp*)(data + y*step.p[0]);
|
||||
}
|
||||
|
||||
template<typename _Tp> inline
|
||||
|
@ -63,9 +63,11 @@ public:
|
||||
class CV_EXPORTS Function
|
||||
{
|
||||
public:
|
||||
virtual ~Function() {}
|
||||
virtual double calc(const double* x) const = 0;
|
||||
virtual void getGradient(const double* /*x*/,double* /*grad*/) {}
|
||||
virtual ~Function() {}
|
||||
virtual int getDims() const = 0;
|
||||
virtual double getGradientEps() const;
|
||||
virtual double calc(const double* x) const = 0;
|
||||
virtual void getGradient(const double* x,double* grad);
|
||||
};
|
||||
|
||||
/** @brief Getter for the optimized function.
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -46,6 +46,25 @@
|
||||
|
||||
namespace cv
|
||||
{
|
||||
double MinProblemSolver::Function::getGradientEps() const { return 1e-3; }
|
||||
void MinProblemSolver::Function::getGradient(const double* x, double* grad)
|
||||
{
|
||||
double eps = getGradientEps();
|
||||
int i, n = getDims();
|
||||
AutoBuffer<double> x_buf(n);
|
||||
double* x_ = x_buf;
|
||||
for( i = 0; i < n; i++ )
|
||||
x_[i] = x[i];
|
||||
for( i = 0; i < n; i++ )
|
||||
{
|
||||
x_[i] = x[i] + eps;
|
||||
double y1 = calc(x_);
|
||||
x_[i] = x[i] - eps;
|
||||
double y0 = calc(x_);
|
||||
grad[i] = (y1 - y0)/(2*eps);
|
||||
x_[i] = x[i];
|
||||
}
|
||||
}
|
||||
|
||||
#define SEC_METHOD_ITERATIONS 4
|
||||
#define INITIAL_SEC_METHOD_SIGMA 0.1
|
||||
|
@ -40,8 +40,13 @@
|
||||
//M*/
|
||||
#include "precomp.hpp"
|
||||
|
||||
#if 0
|
||||
#define dprintf(x) printf x
|
||||
#define print_matrix(x) print(x)
|
||||
#else
|
||||
#define dprintf(x)
|
||||
#define print_matrix(x)
|
||||
#endif
|
||||
|
||||
/*
|
||||
|
||||
@ -51,14 +56,14 @@ Downhill Simplex method in OpenCV dev 3.0.0 getting this error:
|
||||
|
||||
OpenCV Error: Assertion failed (dims <= 2 && data && (unsigned)i0 < (unsigned)(s ize.p[0] * size.p[1])
|
||||
&& elemSize() == (((((DataType<_Tp>::type) & ((512 - 1) << 3)) >> 3) + 1) << ((((sizeof(size_t)/4+1)16384|0x3a50)
|
||||
>> ((DataType<_Tp>::typ e) & ((1 << 3) - 1))2) & 3))) in cv::Mat::at,
|
||||
>> ((DataType<_Tp>::typ e) & ((1 << 3) - 1))2) & 3))) in Mat::at,
|
||||
file C:\builds\master_PackSlave-w in32-vc12-shared\opencv\modules\core\include\opencv2/core/mat.inl.hpp, line 893
|
||||
|
||||
****Problem and Possible Fix*********************************************************************************************************
|
||||
|
||||
DownhillSolverImpl::innerDownhillSimplex something looks broken here:
|
||||
Mat_<double> coord_sum(1,ndim,0.0),buf(1,ndim,0.0),y(1,ndim,0.0);
|
||||
nfunk = 0;
|
||||
fcount = 0;
|
||||
for(i=0;i<ndim+1;++i)
|
||||
{
|
||||
y(i) = f->calc(p[i]);
|
||||
@ -135,275 +140,326 @@ multiple lines in three dimensions as not all lines intersect in three dimension
|
||||
namespace cv
|
||||
{
|
||||
|
||||
class DownhillSolverImpl : public DownhillSolver
|
||||
class DownhillSolverImpl : public DownhillSolver
|
||||
{
|
||||
public:
|
||||
DownhillSolverImpl()
|
||||
{
|
||||
public:
|
||||
void getInitStep(OutputArray step) const;
|
||||
void setInitStep(InputArray step);
|
||||
Ptr<Function> getFunction() const;
|
||||
void setFunction(const Ptr<Function>& f);
|
||||
TermCriteria getTermCriteria() const;
|
||||
DownhillSolverImpl();
|
||||
void setTermCriteria(const TermCriteria& termcrit);
|
||||
double minimize(InputOutputArray x);
|
||||
protected:
|
||||
Ptr<MinProblemSolver::Function> _Function;
|
||||
TermCriteria _termcrit;
|
||||
Mat _step;
|
||||
Mat_<double> buf_x;
|
||||
|
||||
private:
|
||||
inline void createInitialSimplex(Mat_<double>& simplex,Mat& step);
|
||||
inline double innerDownhillSimplex(cv::Mat_<double>& p,double MinRange,double MinError,int& nfunk,
|
||||
const Ptr<MinProblemSolver::Function>& f,int nmax);
|
||||
inline double tryNewPoint(Mat_<double>& p,Mat_<double>& y,Mat_<double>& coord_sum,const Ptr<MinProblemSolver::Function>& f,int ihi,
|
||||
double fac,Mat_<double>& ptry);
|
||||
};
|
||||
|
||||
double DownhillSolverImpl::tryNewPoint(
|
||||
Mat_<double>& p,
|
||||
Mat_<double>& y,
|
||||
Mat_<double>& coord_sum,
|
||||
const Ptr<MinProblemSolver::Function>& f,
|
||||
int ihi,
|
||||
double fac,
|
||||
Mat_<double>& ptry
|
||||
)
|
||||
{
|
||||
int ndim=p.cols;
|
||||
int j;
|
||||
double fac1,fac2,ytry;
|
||||
|
||||
fac1=(1.0-fac)/ndim;
|
||||
fac2=fac1-fac;
|
||||
for (j=0;j<ndim;j++)
|
||||
{
|
||||
ptry(j)=coord_sum(j)*fac1-p(ihi,j)*fac2;
|
||||
}
|
||||
ytry=f->calc(ptry.ptr<double>());
|
||||
if (ytry < y(ihi))
|
||||
{
|
||||
y(ihi)=ytry;
|
||||
for (j=0;j<ndim;j++)
|
||||
{
|
||||
coord_sum(j) += ptry(j)-p(ihi,j);
|
||||
p(ihi,j)=ptry(j);
|
||||
}
|
||||
}
|
||||
|
||||
return ytry;
|
||||
_Function=Ptr<Function>();
|
||||
_step=Mat_<double>();
|
||||
}
|
||||
|
||||
/*
|
||||
Performs the actual minimization of MinProblemSolver::Function f (after the initialization was done)
|
||||
|
||||
The matrix p[ndim+1][1..ndim] represents ndim+1 vertices that
|
||||
form a simplex - each row is an ndim vector.
|
||||
On output, nfunk gives the number of function evaluations taken.
|
||||
*/
|
||||
double DownhillSolverImpl::innerDownhillSimplex(
|
||||
cv::Mat_<double>& p,
|
||||
double MinRange,
|
||||
double MinError,
|
||||
int& nfunk,
|
||||
const Ptr<MinProblemSolver::Function>& f,
|
||||
int nmax
|
||||
)
|
||||
void getInitStep(OutputArray step) const { _step.copyTo(step); }
|
||||
void setInitStep(InputArray step)
|
||||
{
|
||||
int ndim=p.cols;
|
||||
double res;
|
||||
int i,ihi,ilo,inhi,j,mpts=ndim+1;
|
||||
double error, range,ysave,ytry;
|
||||
Mat_<double> coord_sum(1,ndim,0.0),buf(1,ndim,0.0),y(1,ndim+1,0.0);
|
||||
|
||||
nfunk = 0;
|
||||
|
||||
for(i=0;i<ndim+1;++i)
|
||||
{
|
||||
y(i) = f->calc(p[i]);
|
||||
}
|
||||
|
||||
nfunk = ndim+1;
|
||||
|
||||
reduce(p,coord_sum,0,CV_REDUCE_SUM);
|
||||
|
||||
for (;;)
|
||||
{
|
||||
ilo=0;
|
||||
/* find highest (worst), next-to-worst, and lowest
|
||||
(best) points by going through all of them. */
|
||||
ihi = y(0)>y(1) ? (inhi=1,0) : (inhi=0,1);
|
||||
for (i=0;i<mpts;i++)
|
||||
{
|
||||
if (y(i) <= y(ilo))
|
||||
ilo=i;
|
||||
if (y(i) > y(ihi))
|
||||
{
|
||||
inhi=ihi;
|
||||
ihi=i;
|
||||
}
|
||||
else if (y(i) > y(inhi) && i != ihi)
|
||||
inhi=i;
|
||||
}
|
||||
|
||||
/* check stop criterion */
|
||||
error=fabs(y(ihi)-y(ilo));
|
||||
range=0;
|
||||
for(i=0;i<ndim;++i)
|
||||
{
|
||||
double min = p(0,i);
|
||||
double max = p(0,i);
|
||||
double d;
|
||||
for(j=1;j<=ndim;++j)
|
||||
{
|
||||
if( min > p(j,i) ) min = p(j,i);
|
||||
if( max < p(j,i) ) max = p(j,i);
|
||||
}
|
||||
d = fabs(max-min);
|
||||
if(range < d) range = d;
|
||||
}
|
||||
|
||||
if(range <= MinRange || error <= MinError)
|
||||
{ /* Put best point and value in first slot. */
|
||||
std::swap(y(0),y(ilo));
|
||||
for (i=0;i<ndim;i++)
|
||||
{
|
||||
std::swap(p(0,i),p(ilo,i));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (nfunk >= nmax){
|
||||
dprintf(("nmax exceeded\n"));
|
||||
return y(ilo);
|
||||
}
|
||||
nfunk += 2;
|
||||
/*Begin a new iteration. First, reflect the worst point about the centroid of others */
|
||||
ytry = tryNewPoint(p,y,coord_sum,f,ihi,-1.0,buf);
|
||||
if (ytry <= y(ilo))
|
||||
{ /*If that's better than the best point, go twice as far in that direction*/
|
||||
ytry = tryNewPoint(p,y,coord_sum,f,ihi,2.0,buf);
|
||||
}
|
||||
else if (ytry >= y(inhi))
|
||||
{ /* The new point is worse than the second-highest, but better
|
||||
than the worst so do not go so far in that direction */
|
||||
ysave = y(ihi);
|
||||
ytry = tryNewPoint(p,y,coord_sum,f,ihi,0.5,buf);
|
||||
if (ytry >= ysave)
|
||||
{ /* Can't seem to improve things. Contract the simplex to good point
|
||||
in hope to find a simplex landscape. */
|
||||
for (i=0;i<mpts;i++)
|
||||
{
|
||||
if (i != ilo)
|
||||
{
|
||||
for (j=0;j<ndim;j++)
|
||||
{
|
||||
p(i,j) = coord_sum(j) = 0.5*(p(i,j)+p(ilo,j));
|
||||
}
|
||||
y(i)=f->calc(coord_sum.ptr<double>());
|
||||
}
|
||||
}
|
||||
nfunk += ndim;
|
||||
reduce(p,coord_sum,0,CV_REDUCE_SUM);
|
||||
}
|
||||
} else --(nfunk); /* correct nfunk */
|
||||
dprintf(("this is simplex on iteration %d\n",nfunk));
|
||||
print_matrix(p);
|
||||
} /* go to next iteration. */
|
||||
res = y(0);
|
||||
|
||||
return res;
|
||||
// set dimensionality and make a deep copy of step
|
||||
Mat m = step.getMat();
|
||||
dprintf(("m.cols=%d\nm.rows=%d\n", m.cols, m.rows));
|
||||
if( m.rows == 1 )
|
||||
m.copyTo(_step);
|
||||
else
|
||||
transpose(m, _step);
|
||||
}
|
||||
|
||||
void DownhillSolverImpl::createInitialSimplex(Mat_<double>& simplex,Mat& step){
|
||||
for(int i=1;i<=step.cols;++i)
|
||||
{
|
||||
simplex.row(0).copyTo(simplex.row(i));
|
||||
simplex(i,i-1)+= 0.5*step.at<double>(0,i-1);
|
||||
}
|
||||
simplex.row(0) -= 0.5*step;
|
||||
Ptr<MinProblemSolver::Function> getFunction() const { return _Function; }
|
||||
|
||||
dprintf(("this is simplex\n"));
|
||||
print_matrix(simplex);
|
||||
void setFunction(const Ptr<Function>& f) { _Function=f; }
|
||||
|
||||
TermCriteria getTermCriteria() const { return _termcrit; }
|
||||
|
||||
void setTermCriteria( const TermCriteria& termcrit )
|
||||
{
|
||||
CV_Assert( termcrit.type == (TermCriteria::MAX_ITER + TermCriteria::EPS) &&
|
||||
termcrit.epsilon > 0 &&
|
||||
termcrit.maxCount > 0 );
|
||||
_termcrit=termcrit;
|
||||
}
|
||||
|
||||
double DownhillSolverImpl::minimize(InputOutputArray x){
|
||||
double minimize( InputOutputArray x_ )
|
||||
{
|
||||
dprintf(("hi from minimize\n"));
|
||||
CV_Assert(_Function.empty()==false);
|
||||
CV_Assert( !_Function.empty() );
|
||||
CV_Assert( std::min(_step.cols, _step.rows) == 1 &&
|
||||
std::max(_step.cols, _step.rows) >= 2 &&
|
||||
_step.type() == CV_64FC1 );
|
||||
dprintf(("termcrit:\n\ttype: %d\n\tmaxCount: %d\n\tEPS: %g\n",_termcrit.type,_termcrit.maxCount,_termcrit.epsilon));
|
||||
dprintf(("step\n"));
|
||||
print_matrix(_step);
|
||||
|
||||
Mat x_mat=x.getMat();
|
||||
CV_Assert(MIN(x_mat.rows,x_mat.cols)==1);
|
||||
CV_Assert(MAX(x_mat.rows,x_mat.cols)==_step.cols);
|
||||
CV_Assert(x_mat.type()==CV_64FC1);
|
||||
|
||||
Mat_<double> proxy_x;
|
||||
|
||||
if(x_mat.rows>1){
|
||||
buf_x.create(1,_step.cols);
|
||||
Mat_<double> proxy(_step.cols,1,buf_x.ptr<double>());
|
||||
x_mat.copyTo(proxy);
|
||||
proxy_x=buf_x;
|
||||
}else{
|
||||
proxy_x=x_mat;
|
||||
}
|
||||
|
||||
int count=0;
|
||||
int ndim=_step.cols;
|
||||
Mat_<double> simplex=Mat_<double>(ndim+1,ndim,0.0);
|
||||
|
||||
simplex.row(0).copyTo(proxy_x);
|
||||
createInitialSimplex(simplex,_step);
|
||||
double res = innerDownhillSimplex(
|
||||
simplex,_termcrit.epsilon, _termcrit.epsilon, count,_Function,_termcrit.maxCount);
|
||||
simplex.row(0).copyTo(proxy_x);
|
||||
Mat x = x_.getMat(), simplex;
|
||||
|
||||
createInitialSimplex(x, simplex, _step);
|
||||
int count = 0;
|
||||
double res = innerDownhillSimplex(simplex,_termcrit.epsilon, _termcrit.epsilon,
|
||||
count, _termcrit.maxCount);
|
||||
dprintf(("%d iterations done\n",count));
|
||||
|
||||
if(x_mat.rows>1){
|
||||
Mat(x_mat.rows, 1, CV_64F, proxy_x.ptr<double>()).copyTo(x);
|
||||
if( !x.empty() )
|
||||
{
|
||||
Mat simplex_0m(x.rows, x.cols, CV_64F, simplex.ptr<double>());
|
||||
simplex_0m.convertTo(x, x.type());
|
||||
}
|
||||
else
|
||||
{
|
||||
int x_type = x_.fixedType() ? x_.type() : CV_64F;
|
||||
simplex.row(0).convertTo(x_, x_type);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
DownhillSolverImpl::DownhillSolverImpl(){
|
||||
_Function=Ptr<Function>();
|
||||
_step=Mat_<double>();
|
||||
}
|
||||
Ptr<MinProblemSolver::Function> DownhillSolverImpl::getFunction()const{
|
||||
return _Function;
|
||||
}
|
||||
void DownhillSolverImpl::setFunction(const Ptr<Function>& f){
|
||||
_Function=f;
|
||||
}
|
||||
TermCriteria DownhillSolverImpl::getTermCriteria()const{
|
||||
return _termcrit;
|
||||
}
|
||||
void DownhillSolverImpl::setTermCriteria(const TermCriteria& termcrit){
|
||||
CV_Assert(termcrit.type==(TermCriteria::MAX_ITER+TermCriteria::EPS) && termcrit.epsilon>0 && termcrit.maxCount>0);
|
||||
_termcrit=termcrit;
|
||||
}
|
||||
// both minRange & minError are specified by termcrit.epsilon; In addition, user may specify the number of iterations that the algorithm does.
|
||||
Ptr<DownhillSolver> DownhillSolver::create(const Ptr<MinProblemSolver::Function>& f, InputArray initStep, TermCriteria termcrit){
|
||||
Ptr<DownhillSolver> DS = makePtr<DownhillSolverImpl>();
|
||||
DS->setFunction(f);
|
||||
DS->setInitStep(initStep);
|
||||
DS->setTermCriteria(termcrit);
|
||||
return DS;
|
||||
}
|
||||
void DownhillSolverImpl::getInitStep(OutputArray step)const{
|
||||
_step.copyTo(step);
|
||||
}
|
||||
void DownhillSolverImpl::setInitStep(InputArray step){
|
||||
//set dimensionality and make a deep copy of step
|
||||
Mat m=step.getMat();
|
||||
dprintf(("m.cols=%d\nm.rows=%d\n",m.cols,m.rows));
|
||||
CV_Assert(MIN(m.cols,m.rows)==1 && m.type()==CV_64FC1);
|
||||
if(m.rows==1){
|
||||
m.copyTo(_step);
|
||||
}else{
|
||||
transpose(m,_step);
|
||||
protected:
|
||||
Ptr<MinProblemSolver::Function> _Function;
|
||||
TermCriteria _termcrit;
|
||||
Mat _step;
|
||||
|
||||
inline void updateCoordSum(const Mat& p, Mat& coord_sum)
|
||||
{
|
||||
int i, j, m = p.rows, n = p.cols;
|
||||
double* coord_sum_ = coord_sum.ptr<double>();
|
||||
CV_Assert( coord_sum.cols == n && coord_sum.rows == 1 );
|
||||
|
||||
for( j = 0; j < n; j++ )
|
||||
coord_sum_[j] = 0.;
|
||||
|
||||
for( i = 0; i < m; i++ )
|
||||
{
|
||||
const double* p_i = p.ptr<double>(i);
|
||||
for( j = 0; j < n; j++ )
|
||||
coord_sum_[j] += p_i[j];
|
||||
}
|
||||
|
||||
dprintf(("\nupdated coord sum:\n"));
|
||||
print_matrix(coord_sum);
|
||||
|
||||
}
|
||||
|
||||
inline void createInitialSimplex( const Mat& x0, Mat& simplex, Mat& step )
|
||||
{
|
||||
int i, j, ndim = step.cols;
|
||||
CV_Assert( _Function->getDims() == ndim );
|
||||
Mat x = x0;
|
||||
if( x0.empty() )
|
||||
x = Mat::zeros(1, ndim, CV_64F);
|
||||
CV_Assert( (x.cols == 1 && x.rows == ndim) || (x.cols == ndim && x.rows == 1) );
|
||||
CV_Assert( x.type() == CV_32F || x.type() == CV_64F );
|
||||
|
||||
simplex.create(ndim + 1, ndim, CV_64F);
|
||||
Mat simplex_0m(x.rows, x.cols, CV_64F, simplex.ptr<double>());
|
||||
|
||||
x.convertTo(simplex_0m, CV_64F);
|
||||
double* simplex_0 = simplex.ptr<double>();
|
||||
const double* step_ = step.ptr<double>();
|
||||
for( i = 1; i <= ndim; i++ )
|
||||
{
|
||||
double* simplex_i = simplex.ptr<double>(i);
|
||||
for( j = 0; j < ndim; j++ )
|
||||
simplex_i[j] = simplex_0[j];
|
||||
simplex_i[i-1] += 0.5*step_[i-1];
|
||||
}
|
||||
for( j = 0; j < ndim; j++ )
|
||||
simplex_0[j] -= 0.5*step_[j];
|
||||
|
||||
dprintf(("\nthis is simplex\n"));
|
||||
print_matrix(simplex);
|
||||
}
|
||||
|
||||
/*
|
||||
Performs the actual minimization of MinProblemSolver::Function f (after the initialization was done)
|
||||
|
||||
The matrix p[ndim+1][1..ndim] represents ndim+1 vertices that
|
||||
form a simplex - each row is an ndim vector.
|
||||
On output, fcount gives the number of function evaluations taken.
|
||||
*/
|
||||
double innerDownhillSimplex( Mat& p, double MinRange, double MinError, int& fcount, int nmax )
|
||||
{
|
||||
int i, j, ndim = p.cols;
|
||||
Mat coord_sum(1, ndim, CV_64F), buf(1, ndim, CV_64F), y(1, ndim+1, CV_64F);
|
||||
double* y_ = y.ptr<double>();
|
||||
|
||||
fcount = ndim+1;
|
||||
for( i = 0; i <= ndim; i++ )
|
||||
y_[i] = calc_f(p.ptr<double>(i));
|
||||
|
||||
updateCoordSum(p, coord_sum);
|
||||
|
||||
for (;;)
|
||||
{
|
||||
// find highest (worst), next-to-worst, and lowest
|
||||
// (best) points by going through all of them.
|
||||
int ilo = 0, ihi, inhi;
|
||||
if( y_[0] > y_[1] )
|
||||
{
|
||||
ihi = 0; inhi = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
ihi = 1; inhi = 0;
|
||||
}
|
||||
for( i = 0; i <= ndim; i++ )
|
||||
{
|
||||
double yval = y_[i];
|
||||
if (yval <= y_[ilo])
|
||||
ilo = i;
|
||||
if (yval > y_[ihi])
|
||||
{
|
||||
inhi = ihi;
|
||||
ihi = i;
|
||||
}
|
||||
else if (yval > y_[inhi] && i != ihi)
|
||||
inhi = i;
|
||||
}
|
||||
CV_Assert( ihi != inhi );
|
||||
if( ilo == inhi || ilo == ihi )
|
||||
{
|
||||
for( i = 0; i <= ndim; i++ )
|
||||
{
|
||||
double yval = y_[i];
|
||||
if( yval == y_[ilo] && i != ihi && i != inhi )
|
||||
{
|
||||
ilo = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
dprintf(("\nthis is y on iteration %d:\n",fcount));
|
||||
print_matrix(y);
|
||||
|
||||
// check stop criterion
|
||||
double error = fabs(y_[ihi] - y_[ilo]);
|
||||
double range = 0;
|
||||
for( j = 0; j < ndim; j++ )
|
||||
{
|
||||
double minval, maxval;
|
||||
minval = maxval = p.at<double>(0, j);
|
||||
for( i = 1; i <= ndim; i++ )
|
||||
{
|
||||
double pval = p.at<double>(i, j);
|
||||
minval = std::min(minval, pval);
|
||||
maxval = std::max(maxval, pval);
|
||||
}
|
||||
range = std::max(range, fabs(maxval - minval));
|
||||
}
|
||||
|
||||
if( range <= MinRange || error <= MinError || fcount >= nmax )
|
||||
{
|
||||
// Put best point and value in first slot.
|
||||
std::swap(y_[0], y_[ilo]);
|
||||
for( j = 0; j < ndim; j++ )
|
||||
{
|
||||
std::swap(p.at<double>(0, j), p.at<double>(ilo, j));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
double y_lo = y_[ilo], y_nhi = y_[inhi], y_hi = y_[ihi];
|
||||
// Begin a new iteration. First, reflect the worst point about the centroid of others
|
||||
double alpha = -1.0;
|
||||
double y_alpha = tryNewPoint(p, coord_sum, ihi, alpha, buf, fcount);
|
||||
|
||||
dprintf(("\ny_lo=%g, y_nhi=%g, y_hi=%g, y_alpha=%g, p_alpha:\n", y_lo, y_nhi, y_hi, y_alpha));
|
||||
print_matrix(buf);
|
||||
|
||||
if( y_alpha < y_nhi )
|
||||
{
|
||||
if( y_alpha < y_lo )
|
||||
{
|
||||
// If that's better than the best point, go twice as far in that direction
|
||||
double beta = -2.0;
|
||||
double y_beta = tryNewPoint(p, coord_sum, ihi, beta, buf, fcount);
|
||||
dprintf(("\ny_beta=%g, p_beta:\n", y_beta));
|
||||
print_matrix(buf);
|
||||
if( y_beta < y_alpha )
|
||||
{
|
||||
alpha = beta;
|
||||
y_alpha = y_beta;
|
||||
}
|
||||
}
|
||||
replacePoint(p, coord_sum, y, ihi, alpha, y_alpha);
|
||||
}
|
||||
else
|
||||
{
|
||||
// The new point is worse than the second-highest,
|
||||
// do not go so far in that direction
|
||||
double gamma = 0.5;
|
||||
double y_gamma = tryNewPoint(p, coord_sum, ihi, gamma, buf, fcount);
|
||||
dprintf(("\ny_gamma=%g, p_gamma:\n", y_gamma));
|
||||
print_matrix(buf);
|
||||
if( y_gamma < y_hi )
|
||||
replacePoint(p, coord_sum, y, ihi, gamma, y_gamma);
|
||||
else
|
||||
{
|
||||
// Can't seem to improve things.
|
||||
// Contract the simplex to good point
|
||||
// in hope to find a simplex landscape.
|
||||
for( i = 0; i <= ndim; i++ )
|
||||
{
|
||||
if (i != ilo)
|
||||
{
|
||||
for( j = 0; j < ndim; j++ )
|
||||
p.at<double>(i, j) = 0.5*(p.at<double>(i, j) + p.at<double>(ilo, j));
|
||||
y_[i] = calc_f(p.ptr<double>(i));
|
||||
}
|
||||
}
|
||||
fcount += ndim;
|
||||
updateCoordSum(p, coord_sum);
|
||||
}
|
||||
}
|
||||
dprintf(("\nthis is simplex on iteration %d\n",fcount));
|
||||
print_matrix(p);
|
||||
}
|
||||
return y_[0];
|
||||
}
|
||||
|
||||
inline double calc_f(const double* ptr)
|
||||
{
|
||||
double res = _Function->calc(ptr);
|
||||
CV_Assert( !cvIsNaN(res) && !cvIsInf(res) );
|
||||
return res;
|
||||
}
|
||||
|
||||
double tryNewPoint( Mat& p, Mat& coord_sum, int ihi, double alpha_, Mat& ptry, int& fcount )
|
||||
{
|
||||
int j, ndim = p.cols;
|
||||
|
||||
double alpha = (1.0 - alpha_)/ndim;
|
||||
double beta = alpha - alpha_;
|
||||
double* p_ihi = p.ptr<double>(ihi);
|
||||
double* ptry_ = ptry.ptr<double>();
|
||||
double* coord_sum_ = coord_sum.ptr<double>();
|
||||
|
||||
for( j = 0; j < ndim; j++ )
|
||||
ptry_[j] = coord_sum_[j]*alpha - p_ihi[j]*beta;
|
||||
|
||||
fcount++;
|
||||
return calc_f(ptry_);
|
||||
}
|
||||
|
||||
void replacePoint( Mat& p, Mat& coord_sum, Mat& y, int ihi, double alpha_, double ytry )
|
||||
{
|
||||
int j, ndim = p.cols;
|
||||
|
||||
double alpha = (1.0 - alpha_)/ndim;
|
||||
double beta = alpha - alpha_;
|
||||
double* p_ihi = p.ptr<double>(ihi);
|
||||
double* coord_sum_ = coord_sum.ptr<double>();
|
||||
|
||||
for( j = 0; j < ndim; j++ )
|
||||
p_ihi[j] = coord_sum_[j]*alpha - p_ihi[j]*beta;
|
||||
y.at<double>(ihi) = ytry;
|
||||
updateCoordSum(p, coord_sum);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
// both minRange & minError are specified by termcrit.epsilon;
|
||||
// In addition, user may specify the number of iterations that the algorithm does.
|
||||
Ptr<DownhillSolver> DownhillSolver::create( const Ptr<MinProblemSolver::Function>& f,
|
||||
InputArray initStep, TermCriteria termcrit )
|
||||
{
|
||||
Ptr<DownhillSolver> DS = makePtr<DownhillSolverImpl>();
|
||||
DS->setFunction(f);
|
||||
DS->setInitStep(initStep);
|
||||
DS->setTermCriteria(termcrit);
|
||||
return DS;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -60,7 +60,6 @@ namespace cv
|
||||
#undef USE_IPP_DFT
|
||||
#endif
|
||||
|
||||
|
||||
/****************************************************************************************\
|
||||
Discrete Fourier Transform
|
||||
\****************************************************************************************/
|
||||
@ -1090,11 +1089,12 @@ RealDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab,
|
||||
}
|
||||
}
|
||||
|
||||
if( complex_output && (n & 1) == 0 )
|
||||
if( complex_output && ((n & 1) == 0 || n == 1))
|
||||
{
|
||||
dst[-1] = dst[0];
|
||||
dst[0] = 0;
|
||||
dst[n] = 0;
|
||||
if( n > 1 )
|
||||
dst[n] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
@ -2426,6 +2426,47 @@ static bool ocl_dft_amdfft(InputArray _src, OutputArray _dst, int flags)
|
||||
|
||||
#endif // HAVE_CLAMDFFT
|
||||
|
||||
namespace cv
|
||||
{
|
||||
static void complementComplexOutput(Mat& dst, int len, int dft_dims)
|
||||
{
|
||||
int i, n = dst.cols;
|
||||
size_t elem_size = dst.elemSize1();
|
||||
if( elem_size == sizeof(float) )
|
||||
{
|
||||
float* p0 = dst.ptr<float>();
|
||||
size_t dstep = dst.step/sizeof(p0[0]);
|
||||
for( i = 0; i < len; i++ )
|
||||
{
|
||||
float* p = p0 + dstep*i;
|
||||
float* q = dft_dims == 1 || i == 0 || i*2 == len ? p : p0 + dstep*(len-i);
|
||||
|
||||
for( int j = 1; j < (n+1)/2; j++ )
|
||||
{
|
||||
p[(n-j)*2] = q[j*2];
|
||||
p[(n-j)*2+1] = -q[j*2+1];
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
double* p0 = dst.ptr<double>();
|
||||
size_t dstep = dst.step/sizeof(p0[0]);
|
||||
for( i = 0; i < len; i++ )
|
||||
{
|
||||
double* p = p0 + dstep*i;
|
||||
double* q = dft_dims == 1 || i == 0 || i*2 == len ? p : p0 + dstep*(len-i);
|
||||
|
||||
for( int j = 1; j < (n+1)/2; j++ )
|
||||
{
|
||||
p[(n-j)*2] = q[j*2];
|
||||
p[(n-j)*2+1] = -q[j*2+1];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
|
||||
{
|
||||
#ifdef HAVE_CLAMDFFT
|
||||
@ -2705,7 +2746,11 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
|
||||
}
|
||||
|
||||
if( stage != 1 )
|
||||
{
|
||||
if( !inv && real_transform && dst.channels() == 2 )
|
||||
complementComplexOutput(dst, nonzero_rows, 1);
|
||||
break;
|
||||
}
|
||||
src = dst;
|
||||
}
|
||||
else
|
||||
@ -2847,41 +2892,7 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
|
||||
if( stage != 0 )
|
||||
{
|
||||
if( !inv && real_transform && dst.channels() == 2 && len > 1 )
|
||||
{
|
||||
int n = dst.cols;
|
||||
if( elem_size == (int)sizeof(float) )
|
||||
{
|
||||
float* p0 = dst.ptr<float>();
|
||||
size_t dstep = dst.step/sizeof(p0[0]);
|
||||
for( i = 0; i < len; i++ )
|
||||
{
|
||||
float* p = p0 + dstep*i;
|
||||
float* q = i == 0 || i*2 == len ? p : p0 + dstep*(len-i);
|
||||
|
||||
for( int j = 1; j < (n+1)/2; j++ )
|
||||
{
|
||||
p[(n-j)*2] = q[j*2];
|
||||
p[(n-j)*2+1] = -q[j*2+1];
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
double* p0 = dst.ptr<double>();
|
||||
size_t dstep = dst.step/sizeof(p0[0]);
|
||||
for( i = 0; i < len; i++ )
|
||||
{
|
||||
double* p = p0 + dstep*i;
|
||||
double* q = i == 0 || i*2 == len ? p : p0 + dstep*(len-i);
|
||||
|
||||
for( int j = 1; j < (n+1)/2; j++ )
|
||||
{
|
||||
p[(n-j)*2] = q[j*2];
|
||||
p[(n-j)*2+1] = -q[j*2+1];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
complementComplexOutput(dst, len, 2);
|
||||
break;
|
||||
}
|
||||
src = dst;
|
||||
|
@ -502,7 +502,7 @@ JacobiSVDImpl_(_Tp* At, size_t astep, _Tp* _W, _Tp* Vt, size_t vstep,
|
||||
{
|
||||
sd = i < n ? W[i] : 0;
|
||||
|
||||
while( sd <= minval )
|
||||
for( int ii = 0; ii < 100 && sd <= minval; ii++ )
|
||||
{
|
||||
// if we got a zero singular value, then in order to get the corresponding left singular vector
|
||||
// we generate a random vector, project it to the previously computed left singular vectors,
|
||||
@ -527,7 +527,7 @@ JacobiSVDImpl_(_Tp* At, size_t astep, _Tp* _W, _Tp* Vt, size_t vstep,
|
||||
At[i*astep + k] = t;
|
||||
asum += std::abs(t);
|
||||
}
|
||||
asum = asum ? 1/asum : 0;
|
||||
asum = asum > eps*100 ? 1/asum : 0;
|
||||
for( k = 0; k < m; k++ )
|
||||
At[i*astep + k] *= asum;
|
||||
}
|
||||
@ -541,7 +541,7 @@ JacobiSVDImpl_(_Tp* At, size_t astep, _Tp* _W, _Tp* Vt, size_t vstep,
|
||||
sd = std::sqrt(sd);
|
||||
}
|
||||
|
||||
s = (_Tp)(1/sd);
|
||||
s = (_Tp)(sd > minval ? 1/sd : 0.);
|
||||
for( k = 0; k < m; k++ )
|
||||
At[i*astep + k] *= s;
|
||||
}
|
||||
|
@ -43,6 +43,7 @@
|
||||
|
||||
#include "precomp.hpp"
|
||||
#include "opencl_kernels_core.hpp"
|
||||
#include <limits>
|
||||
|
||||
namespace cv
|
||||
{
|
||||
@ -889,38 +890,41 @@ struct iPow_SIMD
|
||||
}
|
||||
};
|
||||
|
||||
#if CV_NEON
|
||||
#if CV_SIMD128
|
||||
|
||||
template <>
|
||||
struct iPow_SIMD<uchar, int>
|
||||
{
|
||||
int operator() ( const uchar * src, uchar * dst, int len, int power)
|
||||
int operator() ( const uchar * src, uchar * dst, int len, int power )
|
||||
{
|
||||
int i = 0;
|
||||
uint32x4_t v_1 = vdupq_n_u32(1u);
|
||||
v_uint32x4 v_1 = v_setall_u32(1u);
|
||||
|
||||
for ( ; i <= len - 8; i += 8)
|
||||
{
|
||||
uint32x4_t v_a1 = v_1, v_a2 = v_1;
|
||||
uint16x8_t v_src = vmovl_u8(vld1_u8(src + i));
|
||||
uint32x4_t v_b1 = vmovl_u16(vget_low_u16(v_src)), v_b2 = vmovl_u16(vget_high_u16(v_src));
|
||||
v_uint32x4 v_a1 = v_1, v_a2 = v_1;
|
||||
v_uint16x8 v = v_load_expand(src + i);
|
||||
v_uint32x4 v_b1, v_b2;
|
||||
v_expand(v, v_b1, v_b2);
|
||||
int p = power;
|
||||
|
||||
while( p > 1 )
|
||||
{
|
||||
if (p & 1)
|
||||
{
|
||||
v_a1 = vmulq_u32(v_a1, v_b1);
|
||||
v_a2 = vmulq_u32(v_a2, v_b2);
|
||||
v_a1 *= v_b1;
|
||||
v_a2 *= v_b2;
|
||||
}
|
||||
v_b1 = vmulq_u32(v_b1, v_b1);
|
||||
v_b2 = vmulq_u32(v_b2, v_b2);
|
||||
v_b1 *= v_b1;
|
||||
v_b2 *= v_b2;
|
||||
p >>= 1;
|
||||
}
|
||||
|
||||
v_a1 = vmulq_u32(v_a1, v_b1);
|
||||
v_a2 = vmulq_u32(v_a2, v_b2);
|
||||
vst1_u8(dst + i, vqmovn_u16(vcombine_u16(vqmovn_u32(v_a1), vqmovn_u32(v_a2))));
|
||||
v_a1 *= v_b1;
|
||||
v_a2 *= v_b2;
|
||||
|
||||
v = v_pack(v_a1, v_a2);
|
||||
v_pack_store(dst + i, v);
|
||||
}
|
||||
|
||||
return i;
|
||||
@ -933,30 +937,33 @@ struct iPow_SIMD<schar, int>
|
||||
int operator() ( const schar * src, schar * dst, int len, int power)
|
||||
{
|
||||
int i = 0;
|
||||
int32x4_t v_1 = vdupq_n_s32(1);
|
||||
v_int32x4 v_1 = v_setall_s32(1);
|
||||
|
||||
for ( ; i <= len - 8; i += 8)
|
||||
{
|
||||
int32x4_t v_a1 = v_1, v_a2 = v_1;
|
||||
int16x8_t v_src = vmovl_s8(vld1_s8(src + i));
|
||||
int32x4_t v_b1 = vmovl_s16(vget_low_s16(v_src)), v_b2 = vmovl_s16(vget_high_s16(v_src));
|
||||
v_int32x4 v_a1 = v_1, v_a2 = v_1;
|
||||
v_int16x8 v = v_load_expand(src + i);
|
||||
v_int32x4 v_b1, v_b2;
|
||||
v_expand(v, v_b1, v_b2);
|
||||
int p = power;
|
||||
|
||||
while( p > 1 )
|
||||
{
|
||||
if (p & 1)
|
||||
{
|
||||
v_a1 = vmulq_s32(v_a1, v_b1);
|
||||
v_a2 = vmulq_s32(v_a2, v_b2);
|
||||
v_a1 *= v_b1;
|
||||
v_a2 *= v_b2;
|
||||
}
|
||||
v_b1 = vmulq_s32(v_b1, v_b1);
|
||||
v_b2 = vmulq_s32(v_b2, v_b2);
|
||||
v_b1 *= v_b1;
|
||||
v_b2 *= v_b2;
|
||||
p >>= 1;
|
||||
}
|
||||
|
||||
v_a1 = vmulq_s32(v_a1, v_b1);
|
||||
v_a2 = vmulq_s32(v_a2, v_b2);
|
||||
vst1_s8(dst + i, vqmovn_s16(vcombine_s16(vqmovn_s32(v_a1), vqmovn_s32(v_a2))));
|
||||
v_a1 *= v_b1;
|
||||
v_a2 *= v_b2;
|
||||
|
||||
v = v_pack(v_a1, v_a2);
|
||||
v_pack_store(dst + i, v);
|
||||
}
|
||||
|
||||
return i;
|
||||
@ -969,30 +976,33 @@ struct iPow_SIMD<ushort, int>
|
||||
int operator() ( const ushort * src, ushort * dst, int len, int power)
|
||||
{
|
||||
int i = 0;
|
||||
uint32x4_t v_1 = vdupq_n_u32(1u);
|
||||
v_uint32x4 v_1 = v_setall_u32(1u);
|
||||
|
||||
for ( ; i <= len - 8; i += 8)
|
||||
{
|
||||
uint32x4_t v_a1 = v_1, v_a2 = v_1;
|
||||
uint16x8_t v_src = vld1q_u16(src + i);
|
||||
uint32x4_t v_b1 = vmovl_u16(vget_low_u16(v_src)), v_b2 = vmovl_u16(vget_high_u16(v_src));
|
||||
v_uint32x4 v_a1 = v_1, v_a2 = v_1;
|
||||
v_uint16x8 v = v_load(src + i);
|
||||
v_uint32x4 v_b1, v_b2;
|
||||
v_expand(v, v_b1, v_b2);
|
||||
int p = power;
|
||||
|
||||
while( p > 1 )
|
||||
{
|
||||
if (p & 1)
|
||||
{
|
||||
v_a1 = vmulq_u32(v_a1, v_b1);
|
||||
v_a2 = vmulq_u32(v_a2, v_b2);
|
||||
v_a1 *= v_b1;
|
||||
v_a2 *= v_b2;
|
||||
}
|
||||
v_b1 = vmulq_u32(v_b1, v_b1);
|
||||
v_b2 = vmulq_u32(v_b2, v_b2);
|
||||
v_b1 *= v_b1;
|
||||
v_b2 *= v_b2;
|
||||
p >>= 1;
|
||||
}
|
||||
|
||||
v_a1 = vmulq_u32(v_a1, v_b1);
|
||||
v_a2 = vmulq_u32(v_a2, v_b2);
|
||||
vst1q_u16(dst + i, vcombine_u16(vqmovn_u32(v_a1), vqmovn_u32(v_a2)));
|
||||
v_a1 *= v_b1;
|
||||
v_a2 *= v_b2;
|
||||
|
||||
v = v_pack(v_a1, v_a2);
|
||||
v_store(dst + i, v);
|
||||
}
|
||||
|
||||
return i;
|
||||
@ -1005,60 +1015,70 @@ struct iPow_SIMD<short, int>
|
||||
int operator() ( const short * src, short * dst, int len, int power)
|
||||
{
|
||||
int i = 0;
|
||||
int32x4_t v_1 = vdupq_n_s32(1);
|
||||
v_int32x4 v_1 = v_setall_s32(1);
|
||||
|
||||
for ( ; i <= len - 8; i += 8)
|
||||
{
|
||||
int32x4_t v_a1 = v_1, v_a2 = v_1;
|
||||
int16x8_t v_src = vld1q_s16(src + i);
|
||||
int32x4_t v_b1 = vmovl_s16(vget_low_s16(v_src)), v_b2 = vmovl_s16(vget_high_s16(v_src));
|
||||
v_int32x4 v_a1 = v_1, v_a2 = v_1;
|
||||
v_int16x8 v = v_load(src + i);
|
||||
v_int32x4 v_b1, v_b2;
|
||||
v_expand(v, v_b1, v_b2);
|
||||
int p = power;
|
||||
|
||||
while( p > 1 )
|
||||
{
|
||||
if (p & 1)
|
||||
{
|
||||
v_a1 = vmulq_s32(v_a1, v_b1);
|
||||
v_a2 = vmulq_s32(v_a2, v_b2);
|
||||
v_a1 *= v_b1;
|
||||
v_a2 *= v_b2;
|
||||
}
|
||||
v_b1 = vmulq_s32(v_b1, v_b1);
|
||||
v_b2 = vmulq_s32(v_b2, v_b2);
|
||||
v_b1 *= v_b1;
|
||||
v_b2 *= v_b2;
|
||||
p >>= 1;
|
||||
}
|
||||
|
||||
v_a1 = vmulq_s32(v_a1, v_b1);
|
||||
v_a2 = vmulq_s32(v_a2, v_b2);
|
||||
vst1q_s16(dst + i, vcombine_s16(vqmovn_s32(v_a1), vqmovn_s32(v_a2)));
|
||||
v_a1 *= v_b1;
|
||||
v_a2 *= v_b2;
|
||||
|
||||
v = v_pack(v_a1, v_a2);
|
||||
v_store(dst + i, v);
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template <>
|
||||
struct iPow_SIMD<int, int>
|
||||
{
|
||||
int operator() ( const int * src, int * dst, int len, int power)
|
||||
{
|
||||
int i = 0;
|
||||
int32x4_t v_1 = vdupq_n_s32(1);
|
||||
v_int32x4 v_1 = v_setall_s32(1);
|
||||
|
||||
for ( ; i <= len - 4; i += 4)
|
||||
for ( ; i <= len - 8; i += 8)
|
||||
{
|
||||
int32x4_t v_b = vld1q_s32(src + i), v_a = v_1;
|
||||
v_int32x4 v_a1 = v_1, v_a2 = v_1;
|
||||
v_int32x4 v_b1 = v_load(src + i), v_b2 = v_load(src + i + 4);
|
||||
int p = power;
|
||||
|
||||
while( p > 1 )
|
||||
{
|
||||
if (p & 1)
|
||||
v_a = vmulq_s32(v_a, v_b);
|
||||
v_b = vmulq_s32(v_b, v_b);
|
||||
{
|
||||
v_a1 *= v_b1;
|
||||
v_a2 *= v_b2;
|
||||
}
|
||||
v_b1 *= v_b1;
|
||||
v_b2 *= v_b2;
|
||||
p >>= 1;
|
||||
}
|
||||
|
||||
v_a = vmulq_s32(v_a, v_b);
|
||||
vst1q_s32(dst + i, v_a);
|
||||
v_a1 *= v_b1;
|
||||
v_a2 *= v_b2;
|
||||
|
||||
v_store(dst + i, v_a1);
|
||||
v_store(dst + i + 4, v_a2);
|
||||
}
|
||||
|
||||
return i;
|
||||
@ -1071,42 +1091,143 @@ struct iPow_SIMD<float, float>
|
||||
int operator() ( const float * src, float * dst, int len, int power)
|
||||
{
|
||||
int i = 0;
|
||||
float32x4_t v_1 = vdupq_n_f32(1.0f);
|
||||
v_float32x4 v_1 = v_setall_f32(1.f);
|
||||
|
||||
for ( ; i <= len - 4; i += 4)
|
||||
for ( ; i <= len - 8; i += 8)
|
||||
{
|
||||
float32x4_t v_b = vld1q_f32(src + i), v_a = v_1;
|
||||
int p = power;
|
||||
v_float32x4 v_a1 = v_1, v_a2 = v_1;
|
||||
v_float32x4 v_b1 = v_load(src + i), v_b2 = v_load(src + i + 4);
|
||||
int p = std::abs(power);
|
||||
if( power < 0 )
|
||||
{
|
||||
v_b1 = v_1 / v_b1;
|
||||
v_b2 = v_1 / v_b2;
|
||||
}
|
||||
|
||||
while( p > 1 )
|
||||
{
|
||||
if (p & 1)
|
||||
v_a = vmulq_f32(v_a, v_b);
|
||||
v_b = vmulq_f32(v_b, v_b);
|
||||
{
|
||||
v_a1 *= v_b1;
|
||||
v_a2 *= v_b2;
|
||||
}
|
||||
v_b1 *= v_b1;
|
||||
v_b2 *= v_b2;
|
||||
p >>= 1;
|
||||
}
|
||||
|
||||
v_a = vmulq_f32(v_a, v_b);
|
||||
vst1q_f32(dst + i, v_a);
|
||||
v_a1 *= v_b1;
|
||||
v_a2 *= v_b2;
|
||||
|
||||
v_store(dst + i, v_a1);
|
||||
v_store(dst + i + 4, v_a2);
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
};
|
||||
|
||||
#if CV_SIMD128_64F
|
||||
template <>
|
||||
struct iPow_SIMD<double, double>
|
||||
{
|
||||
int operator() ( const double * src, double * dst, int len, int power)
|
||||
{
|
||||
int i = 0;
|
||||
v_float64x2 v_1 = v_setall_f64(1.);
|
||||
|
||||
for ( ; i <= len - 4; i += 4)
|
||||
{
|
||||
v_float64x2 v_a1 = v_1, v_a2 = v_1;
|
||||
v_float64x2 v_b1 = v_load(src + i), v_b2 = v_load(src + i + 2);
|
||||
int p = std::abs(power);
|
||||
if( power < 0 )
|
||||
{
|
||||
v_b1 = v_1 / v_b1;
|
||||
v_b2 = v_1 / v_b2;
|
||||
}
|
||||
|
||||
while( p > 1 )
|
||||
{
|
||||
if (p & 1)
|
||||
{
|
||||
v_a1 *= v_b1;
|
||||
v_a2 *= v_b2;
|
||||
}
|
||||
v_b1 *= v_b1;
|
||||
v_b2 *= v_b2;
|
||||
p >>= 1;
|
||||
}
|
||||
|
||||
v_a1 *= v_b1;
|
||||
v_a2 *= v_b2;
|
||||
|
||||
v_store(dst + i, v_a1);
|
||||
v_store(dst + i + 2, v_a2);
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
template<typename T, typename WT>
|
||||
static void
|
||||
iPow_( const T* src, T* dst, int len, int power )
|
||||
iPow_i( const T* src, T* dst, int len, int power )
|
||||
{
|
||||
iPow_SIMD<T, WT> vop;
|
||||
int i = vop(src, dst, len, power);
|
||||
if( power < 0 )
|
||||
{
|
||||
T tab[5] =
|
||||
{
|
||||
power == -1 ? saturate_cast<T>(-1) : 0, (power & 1) ? -1 : 1,
|
||||
std::numeric_limits<T>::max(), 1, power == -1 ? 1 : 0
|
||||
};
|
||||
for( int i = 0; i < len; i++ )
|
||||
{
|
||||
T val = src[i];
|
||||
dst[i] = cv_abs(val) <= 2 ? tab[val + 2] : (T)0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
iPow_SIMD<T, WT> vop;
|
||||
int i = vop(src, dst, len, power);
|
||||
|
||||
for( ; i < len; i++ )
|
||||
{
|
||||
WT a = 1, b = src[i];
|
||||
int p = power;
|
||||
while( p > 1 )
|
||||
{
|
||||
if( p & 1 )
|
||||
a *= b;
|
||||
b *= b;
|
||||
p >>= 1;
|
||||
}
|
||||
|
||||
a *= b;
|
||||
dst[i] = saturate_cast<T>(a);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static void
|
||||
iPow_f( const T* src, T* dst, int len, int power0 )
|
||||
{
|
||||
iPow_SIMD<T, T> vop;
|
||||
int i = vop(src, dst, len, power0);
|
||||
int power = std::abs(power0);
|
||||
|
||||
for( ; i < len; i++ )
|
||||
{
|
||||
WT a = 1, b = src[i];
|
||||
T a = 1, b = src[i];
|
||||
int p = power;
|
||||
if( power0 < 0 )
|
||||
b = 1/b;
|
||||
|
||||
while( p > 1 )
|
||||
{
|
||||
if( p & 1 )
|
||||
@ -1116,44 +1237,43 @@ iPow_( const T* src, T* dst, int len, int power )
|
||||
}
|
||||
|
||||
a *= b;
|
||||
dst[i] = saturate_cast<T>(a);
|
||||
dst[i] = a;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void iPow8u(const uchar* src, uchar* dst, int len, int power)
|
||||
{
|
||||
iPow_<uchar, int>(src, dst, len, power);
|
||||
iPow_i<uchar, unsigned>(src, dst, len, power);
|
||||
}
|
||||
|
||||
static void iPow8s(const schar* src, schar* dst, int len, int power)
|
||||
{
|
||||
iPow_<schar, int>(src, dst, len, power);
|
||||
iPow_i<schar, int>(src, dst, len, power);
|
||||
}
|
||||
|
||||
static void iPow16u(const ushort* src, ushort* dst, int len, int power)
|
||||
{
|
||||
iPow_<ushort, int>(src, dst, len, power);
|
||||
iPow_i<ushort, unsigned>(src, dst, len, power);
|
||||
}
|
||||
|
||||
static void iPow16s(const short* src, short* dst, int len, int power)
|
||||
{
|
||||
iPow_<short, int>(src, dst, len, power);
|
||||
iPow_i<short, int>(src, dst, len, power);
|
||||
}
|
||||
|
||||
static void iPow32s(const int* src, int* dst, int len, int power)
|
||||
{
|
||||
iPow_<int, int>(src, dst, len, power);
|
||||
iPow_i<int, int>(src, dst, len, power);
|
||||
}
|
||||
|
||||
static void iPow32f(const float* src, float* dst, int len, int power)
|
||||
{
|
||||
iPow_<float, float>(src, dst, len, power);
|
||||
iPow_f<float>(src, dst, len, power);
|
||||
}
|
||||
|
||||
static void iPow64f(const double* src, double* dst, int len, int power)
|
||||
{
|
||||
iPow_<double, double>(src, dst, len, power);
|
||||
iPow_f<double>(src, dst, len, power);
|
||||
}
|
||||
|
||||
|
||||
@ -1176,14 +1296,25 @@ static bool ocl_pow(InputArray _src, double power, OutputArray _dst,
|
||||
bool doubleSupport = d.doubleFPConfig() > 0;
|
||||
|
||||
_dst.createSameSize(_src, type);
|
||||
if (is_ipower && (ipower == 0 || ipower == 1))
|
||||
if (is_ipower)
|
||||
{
|
||||
if (ipower == 0)
|
||||
{
|
||||
_dst.setTo(Scalar::all(1));
|
||||
else if (ipower == 1)
|
||||
return true;
|
||||
}
|
||||
if (ipower == 1)
|
||||
{
|
||||
_src.copyTo(_dst);
|
||||
|
||||
return true;
|
||||
return true;
|
||||
}
|
||||
if( ipower < 0 )
|
||||
{
|
||||
if( depth == CV_32F || depth == CV_64F )
|
||||
is_ipower = false;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (depth == CV_64F && !doubleSupport)
|
||||
@ -1233,20 +1364,11 @@ void pow( InputArray _src, double power, OutputArray _dst )
|
||||
{
|
||||
int type = _src.type(), depth = CV_MAT_DEPTH(type),
|
||||
cn = CV_MAT_CN(type), ipower = cvRound(power);
|
||||
bool is_ipower = fabs(ipower - power) < DBL_EPSILON, same = false,
|
||||
bool is_ipower = fabs(ipower - power) < DBL_EPSILON,
|
||||
useOpenCL = _dst.isUMat() && _src.dims() <= 2;
|
||||
|
||||
if( is_ipower && !(ocl::Device::getDefault().isIntel() && useOpenCL && depth != CV_64F))
|
||||
{
|
||||
if( ipower < 0 )
|
||||
{
|
||||
divide( Scalar::all(1), _src, _dst );
|
||||
if( ipower == -1 )
|
||||
return;
|
||||
ipower = -ipower;
|
||||
same = true;
|
||||
}
|
||||
|
||||
switch( ipower )
|
||||
{
|
||||
case 0:
|
||||
@ -1257,59 +1379,18 @@ void pow( InputArray _src, double power, OutputArray _dst )
|
||||
_src.copyTo(_dst);
|
||||
return;
|
||||
case 2:
|
||||
#if defined(HAVE_IPP)
|
||||
CV_IPP_CHECK()
|
||||
{
|
||||
if (depth == CV_32F && !same && ( (_src.dims() <= 2 && !ocl::useOpenCL()) ||
|
||||
(_src.dims() > 2 && _src.isContinuous() && _dst.isContinuous()) ))
|
||||
{
|
||||
Mat src = _src.getMat();
|
||||
_dst.create( src.dims, src.size, type );
|
||||
Mat dst = _dst.getMat();
|
||||
|
||||
Size size = src.size();
|
||||
int srcstep = (int)src.step, dststep = (int)dst.step, esz = CV_ELEM_SIZE(type);
|
||||
if (src.isContinuous() && dst.isContinuous())
|
||||
{
|
||||
size.width = (int)src.total();
|
||||
size.height = 1;
|
||||
srcstep = dststep = (int)src.total() * esz;
|
||||
}
|
||||
size.width *= cn;
|
||||
|
||||
IppStatus status = ippiSqr_32f_C1R(src.ptr<Ipp32f>(), srcstep, dst.ptr<Ipp32f>(), dststep, ippiSize(size.width, size.height));
|
||||
|
||||
if (status >= 0)
|
||||
{
|
||||
CV_IMPL_ADD(CV_IMPL_IPP);
|
||||
return;
|
||||
}
|
||||
setIppErrorStatus();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if (same)
|
||||
multiply(_dst, _dst, _dst);
|
||||
else
|
||||
multiply(_src, _src, _dst);
|
||||
multiply(_src, _src, _dst);
|
||||
return;
|
||||
}
|
||||
}
|
||||
else
|
||||
CV_Assert( depth == CV_32F || depth == CV_64F );
|
||||
|
||||
CV_OCL_RUN(useOpenCL,
|
||||
ocl_pow(same ? _dst : _src, power, _dst, is_ipower, ipower))
|
||||
CV_OCL_RUN(useOpenCL, ocl_pow(_src, power, _dst, is_ipower, ipower))
|
||||
|
||||
Mat src, dst;
|
||||
if (same)
|
||||
src = dst = _dst.getMat();
|
||||
else
|
||||
{
|
||||
src = _src.getMat();
|
||||
_dst.create( src.dims, src.size, type );
|
||||
dst = _dst.getMat();
|
||||
}
|
||||
Mat src = _src.getMat();
|
||||
_dst.create( src.dims, src.size, type );
|
||||
Mat dst = _dst.getMat();
|
||||
|
||||
const Mat* arrays[] = {&src, &dst, 0};
|
||||
uchar* ptrs[2];
|
||||
@ -1335,52 +1416,103 @@ void pow( InputArray _src, double power, OutputArray _dst )
|
||||
}
|
||||
else
|
||||
{
|
||||
#if defined(HAVE_IPP)
|
||||
CV_IPP_CHECK()
|
||||
{
|
||||
if (src.isContinuous() && dst.isContinuous())
|
||||
{
|
||||
IppStatus status = depth == CV_32F ?
|
||||
ippsPowx_32f_A21(src.ptr<Ipp32f>(), (Ipp32f)power, dst.ptr<Ipp32f>(), (Ipp32s)(src.total() * cn)) :
|
||||
ippsPowx_64f_A50(src.ptr<Ipp64f>(), power, dst.ptr<Ipp64f>(), (Ipp32s)(src.total() * cn));
|
||||
|
||||
if (status >= 0)
|
||||
{
|
||||
CV_IMPL_ADD(CV_IMPL_IPP);
|
||||
return;
|
||||
}
|
||||
setIppErrorStatus();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
int j, k, blockSize = std::min(len, ((BLOCK_SIZE + cn-1)/cn)*cn);
|
||||
size_t esz1 = src.elemSize1();
|
||||
AutoBuffer<uchar> buf;
|
||||
Cv32suf inf32, nan32;
|
||||
Cv64suf inf64, nan64;
|
||||
float* fbuf = 0;
|
||||
double* dbuf = 0;
|
||||
inf32.i = 0x7f800000;
|
||||
nan32.i = 0x7fffffff;
|
||||
inf64.i = CV_BIG_INT(0x7FF0000000000000);
|
||||
nan64.i = CV_BIG_INT(0x7FFFFFFFFFFFFFFF);
|
||||
|
||||
if( src.ptr() == dst.ptr() )
|
||||
{
|
||||
buf.allocate(blockSize*esz1);
|
||||
fbuf = (float*)(uchar*)buf;
|
||||
dbuf = (double*)(uchar*)buf;
|
||||
}
|
||||
|
||||
for( size_t i = 0; i < it.nplanes; i++, ++it )
|
||||
{
|
||||
for( j = 0; j < len; j += blockSize )
|
||||
{
|
||||
int bsz = std::min(len - j, blockSize);
|
||||
|
||||
#if defined(HAVE_IPP)
|
||||
CV_IPP_CHECK()
|
||||
{
|
||||
IppStatus status = depth == CV_32F ?
|
||||
ippsPowx_32f_A21((const float*)ptrs[0], (float)power, (float*)ptrs[1], bsz) :
|
||||
ippsPowx_64f_A50((const double*)ptrs[0], (double)power, (double*)ptrs[1], bsz);
|
||||
|
||||
if (status >= 0)
|
||||
{
|
||||
CV_IMPL_ADD(CV_IMPL_IPP);
|
||||
ptrs[0] += bsz*esz1;
|
||||
ptrs[1] += bsz*esz1;
|
||||
continue;
|
||||
}
|
||||
setIppErrorStatus();
|
||||
}
|
||||
#endif
|
||||
|
||||
if( depth == CV_32F )
|
||||
{
|
||||
const float* x = (const float*)ptrs[0];
|
||||
float* x0 = (float*)ptrs[0];
|
||||
float* x = fbuf ? fbuf : x0;
|
||||
float* y = (float*)ptrs[1];
|
||||
|
||||
if( x != x0 )
|
||||
memcpy(x, x0, bsz*esz1);
|
||||
|
||||
Log_32f(x, y, bsz);
|
||||
for( k = 0; k < bsz; k++ )
|
||||
y[k] = (float)(y[k]*power);
|
||||
Exp_32f(y, y, bsz);
|
||||
for( k = 0; k < bsz; k++ )
|
||||
{
|
||||
if( x0[k] <= 0 )
|
||||
{
|
||||
if( x0[k] == 0.f )
|
||||
{
|
||||
if( power < 0 )
|
||||
y[k] = inf32.f;
|
||||
}
|
||||
else
|
||||
y[k] = nan32.f;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const double* x = (const double*)ptrs[0];
|
||||
double* x0 = (double*)ptrs[0];
|
||||
double* x = dbuf ? dbuf : x0;
|
||||
double* y = (double*)ptrs[1];
|
||||
|
||||
if( x != x0 )
|
||||
memcpy(x, x0, bsz*esz1);
|
||||
|
||||
Log_64f(x, y, bsz);
|
||||
for( k = 0; k < bsz; k++ )
|
||||
y[k] *= power;
|
||||
Exp_64f(y, y, bsz);
|
||||
|
||||
for( k = 0; k < bsz; k++ )
|
||||
{
|
||||
if( x0[k] <= 0 )
|
||||
{
|
||||
if( x0[k] == 0. )
|
||||
{
|
||||
if( power < 0 )
|
||||
y[k] = inf64.f;
|
||||
}
|
||||
else
|
||||
y[k] = nan64.f;
|
||||
}
|
||||
}
|
||||
}
|
||||
ptrs[0] += bsz*esz1;
|
||||
ptrs[1] += bsz*esz1;
|
||||
|
@ -1195,7 +1195,7 @@ void cv::gemm( InputArray matA, InputArray matB, double alpha,
|
||||
GEMMBlockMulFunc blockMulFunc;
|
||||
GEMMStoreFunc storeFunc;
|
||||
Mat *matD = &D, tmat;
|
||||
int tmat_size = 0;
|
||||
size_t tmat_size = 0;
|
||||
const uchar* Cdata = C.data;
|
||||
size_t Cstep = C.data ? (size_t)C.step : 0;
|
||||
AutoBuffer<uchar> buf;
|
||||
@ -1228,7 +1228,7 @@ void cv::gemm( InputArray matA, InputArray matB, double alpha,
|
||||
|
||||
if( D.data == A.data || D.data == B.data )
|
||||
{
|
||||
tmat_size = d_size.width*d_size.height*CV_ELEM_SIZE(type);
|
||||
tmat_size = (size_t)d_size.width*d_size.height*CV_ELEM_SIZE(type);
|
||||
// Allocate tmat later, once the size of buf is known
|
||||
matD = &tmat;
|
||||
}
|
||||
@ -1319,7 +1319,7 @@ void cv::gemm( InputArray matA, InputArray matB, double alpha,
|
||||
int is_b_t = flags & GEMM_2_T;
|
||||
int elem_size = CV_ELEM_SIZE(type);
|
||||
int dk0_1, dk0_2;
|
||||
int a_buf_size = 0, b_buf_size, d_buf_size;
|
||||
size_t a_buf_size = 0, b_buf_size, d_buf_size;
|
||||
uchar* a_buf = 0;
|
||||
uchar* b_buf = 0;
|
||||
uchar* d_buf = 0;
|
||||
@ -1360,12 +1360,12 @@ void cv::gemm( InputArray matA, InputArray matB, double alpha,
|
||||
dn0 = block_size / dk0;
|
||||
|
||||
dk0_1 = (dn0+dn0/8+2) & -2;
|
||||
b_buf_size = (dk0+dk0/8+1)*dk0_1*elem_size;
|
||||
d_buf_size = (dk0+dk0/8+1)*dk0_1*work_elem_size;
|
||||
b_buf_size = (size_t)(dk0+dk0/8+1)*dk0_1*elem_size;
|
||||
d_buf_size = (size_t)(dk0+dk0/8+1)*dk0_1*work_elem_size;
|
||||
|
||||
if( is_a_t )
|
||||
{
|
||||
a_buf_size = (dm0+dm0/8+1)*((dk0+dk0/8+2)&-2)*elem_size;
|
||||
a_buf_size = (size_t)(dm0+dm0/8+1)*((dk0+dk0/8+2)&-2)*elem_size;
|
||||
flags &= ~GEMM_1_T;
|
||||
}
|
||||
|
||||
|
@ -222,11 +222,10 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
MatAllocator* Mat::getStdAllocator()
|
||||
{
|
||||
static MatAllocator * allocator = new StdMatAllocator();
|
||||
return allocator;
|
||||
static StdMatAllocator allocator;
|
||||
return &allocator;
|
||||
}
|
||||
|
||||
void swap( Mat& a, Mat& b )
|
||||
@ -1155,6 +1154,21 @@ Mat _InputArray::getMat_(int i) const
|
||||
return !v.empty() ? Mat(size(), t, (void*)&v[0]) : Mat();
|
||||
}
|
||||
|
||||
if( k == STD_BOOL_VECTOR )
|
||||
{
|
||||
CV_Assert( i < 0 );
|
||||
int t = CV_8U;
|
||||
const std::vector<bool>& v = *(const std::vector<bool>*)obj;
|
||||
int j, n = (int)v.size();
|
||||
if( n == 0 )
|
||||
return Mat();
|
||||
Mat m(1, n, t);
|
||||
uchar* dst = m.data;
|
||||
for( j = 0; j < n; j++ )
|
||||
dst[j] = (uchar)v[j];
|
||||
return m;
|
||||
}
|
||||
|
||||
if( k == NONE )
|
||||
return Mat();
|
||||
|
||||
@ -1482,6 +1496,13 @@ Size _InputArray::size(int i) const
|
||||
return szb == szi ? Size((int)szb, 1) : Size((int)(szb/CV_ELEM_SIZE(flags)), 1);
|
||||
}
|
||||
|
||||
if( k == STD_BOOL_VECTOR )
|
||||
{
|
||||
CV_Assert( i < 0 );
|
||||
const std::vector<bool>& v = *(const std::vector<bool>*)obj;
|
||||
return Size((int)v.size(), 1);
|
||||
}
|
||||
|
||||
if( k == NONE )
|
||||
return Size();
|
||||
|
||||
@ -1662,7 +1683,7 @@ int _InputArray::dims(int i) const
|
||||
return 2;
|
||||
}
|
||||
|
||||
if( k == STD_VECTOR )
|
||||
if( k == STD_VECTOR || k == STD_BOOL_VECTOR )
|
||||
{
|
||||
CV_Assert( i < 0 );
|
||||
return 2;
|
||||
@ -1774,7 +1795,7 @@ int _InputArray::type(int i) const
|
||||
if( k == EXPR )
|
||||
return ((const MatExpr*)obj)->type();
|
||||
|
||||
if( k == MATX || k == STD_VECTOR || k == STD_VECTOR_VECTOR )
|
||||
if( k == MATX || k == STD_VECTOR || k == STD_VECTOR_VECTOR || k == STD_BOOL_VECTOR )
|
||||
return CV_MAT_TYPE(flags);
|
||||
|
||||
if( k == NONE )
|
||||
@ -1849,6 +1870,12 @@ bool _InputArray::empty() const
|
||||
return v.empty();
|
||||
}
|
||||
|
||||
if( k == STD_BOOL_VECTOR )
|
||||
{
|
||||
const std::vector<bool>& v = *(const std::vector<bool>*)obj;
|
||||
return v.empty();
|
||||
}
|
||||
|
||||
if( k == NONE )
|
||||
return true;
|
||||
|
||||
@ -1893,7 +1920,8 @@ bool _InputArray::isContinuous(int i) const
|
||||
if( k == UMAT )
|
||||
return i < 0 ? ((const UMat*)obj)->isContinuous() : true;
|
||||
|
||||
if( k == EXPR || k == MATX || k == STD_VECTOR || k == NONE || k == STD_VECTOR_VECTOR)
|
||||
if( k == EXPR || k == MATX || k == STD_VECTOR ||
|
||||
k == NONE || k == STD_VECTOR_VECTOR || k == STD_BOOL_VECTOR )
|
||||
return true;
|
||||
|
||||
if( k == STD_VECTOR_MAT )
|
||||
@ -1924,7 +1952,8 @@ bool _InputArray::isSubmatrix(int i) const
|
||||
if( k == UMAT )
|
||||
return i < 0 ? ((const UMat*)obj)->isSubmatrix() : false;
|
||||
|
||||
if( k == EXPR || k == MATX || k == STD_VECTOR || k == NONE || k == STD_VECTOR_VECTOR)
|
||||
if( k == EXPR || k == MATX || k == STD_VECTOR ||
|
||||
k == NONE || k == STD_VECTOR_VECTOR || k == STD_BOOL_VECTOR )
|
||||
return false;
|
||||
|
||||
if( k == STD_VECTOR_MAT )
|
||||
@ -1962,7 +1991,8 @@ size_t _InputArray::offset(int i) const
|
||||
return ((const UMat*)obj)->offset;
|
||||
}
|
||||
|
||||
if( k == EXPR || k == MATX || k == STD_VECTOR || k == NONE || k == STD_VECTOR_VECTOR)
|
||||
if( k == EXPR || k == MATX || k == STD_VECTOR ||
|
||||
k == NONE || k == STD_VECTOR_VECTOR || k == STD_BOOL_VECTOR )
|
||||
return 0;
|
||||
|
||||
if( k == STD_VECTOR_MAT )
|
||||
@ -2009,7 +2039,8 @@ size_t _InputArray::step(int i) const
|
||||
return ((const UMat*)obj)->step;
|
||||
}
|
||||
|
||||
if( k == EXPR || k == MATX || k == STD_VECTOR || k == NONE || k == STD_VECTOR_VECTOR)
|
||||
if( k == EXPR || k == MATX || k == STD_VECTOR ||
|
||||
k == NONE || k == STD_VECTOR_VECTOR || k == STD_BOOL_VECTOR )
|
||||
return 0;
|
||||
|
||||
if( k == STD_VECTOR_MAT )
|
||||
@ -2044,7 +2075,7 @@ void _InputArray::copyTo(const _OutputArray& arr) const
|
||||
|
||||
if( k == NONE )
|
||||
arr.release();
|
||||
else if( k == MAT || k == MATX || k == STD_VECTOR )
|
||||
else if( k == MAT || k == MATX || k == STD_VECTOR || k == STD_BOOL_VECTOR )
|
||||
{
|
||||
Mat m = getMat();
|
||||
m.copyTo(arr);
|
||||
@ -2069,7 +2100,7 @@ void _InputArray::copyTo(const _OutputArray& arr, const _InputArray & mask) cons
|
||||
|
||||
if( k == NONE )
|
||||
arr.release();
|
||||
else if( k == MAT || k == MATX || k == STD_VECTOR )
|
||||
else if( k == MAT || k == MATX || k == STD_VECTOR || k == STD_BOOL_VECTOR )
|
||||
{
|
||||
Mat m = getMat();
|
||||
m.copyTo(arr, mask);
|
||||
|
@ -574,6 +574,16 @@ __kernel void fft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
|
||||
#pragma unroll
|
||||
for (int i=x; i<cols; i+=block_size)
|
||||
dst[i] = SCALE_VAL(smem[i], scale);
|
||||
#ifdef REAL_INPUT
|
||||
#ifdef COMPLEX_OUTPUT
|
||||
#ifdef IS_1D
|
||||
for(int i=x+1; i < (dst_cols+1)/2; i+=block_size)
|
||||
{
|
||||
dst[dst_cols-i] = (CT)(SCALE_VAL(smem[i].x, scale), SCALE_VAL(-smem[i].y, scale));
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#else
|
||||
// pack row to CCS
|
||||
__local FT* smem_1cn = (__local FT*) smem;
|
||||
|
@ -151,39 +151,46 @@ template<typename T> struct OpMax
|
||||
T operator ()(const T a, const T b) const { return std::max(a, b); }
|
||||
};
|
||||
|
||||
inline Size getContinuousSize_( int flags, int cols, int rows, int widthScale )
|
||||
{
|
||||
int64 sz = (int64)cols * rows * widthScale;
|
||||
return (flags & Mat::CONTINUOUS_FLAG) != 0 &&
|
||||
(int)sz == sz ? Size((int)sz, 1) : Size(cols * widthScale, rows);
|
||||
}
|
||||
|
||||
inline Size getContinuousSize( const Mat& m1, int widthScale=1 )
|
||||
{
|
||||
return m1.isContinuous() ? Size(m1.cols*m1.rows*widthScale, 1) :
|
||||
Size(m1.cols*widthScale, m1.rows);
|
||||
return getContinuousSize_(m1.flags,
|
||||
m1.cols, m1.rows, widthScale);
|
||||
}
|
||||
|
||||
inline Size getContinuousSize( const Mat& m1, const Mat& m2, int widthScale=1 )
|
||||
{
|
||||
return (m1.flags & m2.flags & Mat::CONTINUOUS_FLAG) != 0 ?
|
||||
Size(m1.cols*m1.rows*widthScale, 1) : Size(m1.cols*widthScale, m1.rows);
|
||||
return getContinuousSize_(m1.flags & m2.flags,
|
||||
m1.cols, m1.rows, widthScale);
|
||||
}
|
||||
|
||||
inline Size getContinuousSize( const Mat& m1, const Mat& m2,
|
||||
const Mat& m3, int widthScale=1 )
|
||||
{
|
||||
return (m1.flags & m2.flags & m3.flags & Mat::CONTINUOUS_FLAG) != 0 ?
|
||||
Size(m1.cols*m1.rows*widthScale, 1) : Size(m1.cols*widthScale, m1.rows);
|
||||
return getContinuousSize_(m1.flags & m2.flags & m3.flags,
|
||||
m1.cols, m1.rows, widthScale);
|
||||
}
|
||||
|
||||
inline Size getContinuousSize( const Mat& m1, const Mat& m2,
|
||||
const Mat& m3, const Mat& m4,
|
||||
int widthScale=1 )
|
||||
{
|
||||
return (m1.flags & m2.flags & m3.flags & m4.flags & Mat::CONTINUOUS_FLAG) != 0 ?
|
||||
Size(m1.cols*m1.rows*widthScale, 1) : Size(m1.cols*widthScale, m1.rows);
|
||||
return getContinuousSize_(m1.flags & m2.flags & m3.flags & m4.flags,
|
||||
m1.cols, m1.rows, widthScale);
|
||||
}
|
||||
|
||||
inline Size getContinuousSize( const Mat& m1, const Mat& m2,
|
||||
const Mat& m3, const Mat& m4,
|
||||
const Mat& m5, int widthScale=1 )
|
||||
{
|
||||
return (m1.flags & m2.flags & m3.flags & m4.flags & m5.flags & Mat::CONTINUOUS_FLAG) != 0 ?
|
||||
Size(m1.cols*m1.rows*widthScale, 1) : Size(m1.cols*widthScale, m1.rows);
|
||||
return getContinuousSize_(m1.flags & m2.flags & m3.flags & m4.flags & m5.flags,
|
||||
m1.cols, m1.rows, widthScale);
|
||||
}
|
||||
|
||||
struct NoVec
|
||||
@ -290,4 +297,6 @@ extern bool __termination; // skip some cleanups, because process is terminating
|
||||
|
||||
}
|
||||
|
||||
#include "opencv2/hal/intrin.hpp"
|
||||
|
||||
#endif /*_CXCORE_INTERNAL_H_*/
|
||||
|
@ -748,29 +748,35 @@ namespace cv
|
||||
{
|
||||
|
||||
template<typename T> static void
|
||||
randShuffle_( Mat& _arr, RNG& rng, double iterFactor )
|
||||
randShuffle_( Mat& _arr, RNG& rng, double )
|
||||
{
|
||||
int sz = _arr.rows*_arr.cols, iters = cvRound(iterFactor*sz);
|
||||
unsigned sz = (unsigned)_arr.total();
|
||||
if( _arr.isContinuous() )
|
||||
{
|
||||
T* arr = _arr.ptr<T>();
|
||||
for( int i = 0; i < iters; i++ )
|
||||
for( unsigned i = 0; i < sz; i++ )
|
||||
{
|
||||
int j = (unsigned)rng % sz, k = (unsigned)rng % sz;
|
||||
std::swap( arr[j], arr[k] );
|
||||
unsigned j = (unsigned)rng % sz;
|
||||
std::swap( arr[j], arr[i] );
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_Assert( _arr.dims <= 2 );
|
||||
uchar* data = _arr.ptr();
|
||||
size_t step = _arr.step;
|
||||
int rows = _arr.rows;
|
||||
int cols = _arr.cols;
|
||||
for( int i = 0; i < iters; i++ )
|
||||
for( int i0 = 0; i0 < rows; i0++ )
|
||||
{
|
||||
int j1 = (unsigned)rng % sz, k1 = (unsigned)rng % sz;
|
||||
int j0 = j1/cols, k0 = k1/cols;
|
||||
j1 -= j0*cols; k1 -= k0*cols;
|
||||
std::swap( ((T*)(data + step*j0))[j1], ((T*)(data + step*k0))[k1] );
|
||||
T* p = _arr.ptr<T>(i0);
|
||||
for( int j0 = 0; j0 < cols; j0++ )
|
||||
{
|
||||
unsigned k1 = (unsigned)rng % sz;
|
||||
int i1 = (int)(k1 / cols);
|
||||
int j1 = (int)(k1 - (unsigned)i1*(unsigned)cols);
|
||||
std::swap( p[j0], ((T*)(data + step*i1))[j1] );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -3826,6 +3826,11 @@ void cv::findNonZero( InputArray _src, OutputArray _idx )
|
||||
Mat src = _src.getMat();
|
||||
CV_Assert( src.type() == CV_8UC1 );
|
||||
int n = countNonZero(src);
|
||||
if( n == 0 )
|
||||
{
|
||||
_idx.release();
|
||||
return;
|
||||
}
|
||||
if( _idx.kind() == _InputArray::MAT && !_idx.getMatRef().isContinuous() )
|
||||
_idx.release();
|
||||
_idx.create(n, 1, CV_32SC2);
|
||||
|
@ -1791,3 +1791,28 @@ INSTANTIATE_TEST_CASE_P(Arithm, SubtractOutputMatNotEmpty, testing::Combine(
|
||||
testing::Values(perf::MatType(CV_8UC1), CV_8UC3, CV_8UC4, CV_16SC1, CV_16SC3),
|
||||
testing::Values(-1, CV_16S, CV_32S, CV_32F),
|
||||
testing::Bool()));
|
||||
|
||||
|
||||
TEST(Core_FindNonZero, singular)
|
||||
{
|
||||
Mat img(10, 10, CV_8U, Scalar::all(0));
|
||||
vector<Point> pts, pts2(10);
|
||||
findNonZero(img, pts);
|
||||
findNonZero(img, pts2);
|
||||
ASSERT_TRUE(pts.empty() && pts2.empty());
|
||||
}
|
||||
|
||||
TEST(Core_BoolVector, support)
|
||||
{
|
||||
std::vector<bool> test;
|
||||
int i, n = 205;
|
||||
int nz = 0;
|
||||
test.resize(n);
|
||||
for( i = 0; i < n; i++ )
|
||||
{
|
||||
test[i] = theRNG().uniform(0, 2) != 0;
|
||||
nz += (int)test[i];
|
||||
}
|
||||
ASSERT_EQ( nz, countNonZero(test) );
|
||||
ASSERT_FLOAT_EQ((float)nz/n, (float)(mean(test)[0]));
|
||||
}
|
||||
|
@ -58,18 +58,21 @@ static void mytest(cv::Ptr<cv::ConjGradSolver> solver,cv::Ptr<cv::MinProblemSolv
|
||||
std::cout<<"--------------------------\n";
|
||||
}
|
||||
|
||||
class SphereF:public cv::MinProblemSolver::Function{
|
||||
class SphereF_CG:public cv::MinProblemSolver::Function{
|
||||
public:
|
||||
int getDims() const { return 4; }
|
||||
double calc(const double* x)const{
|
||||
return x[0]*x[0]+x[1]*x[1]+x[2]*x[2]+x[3]*x[3];
|
||||
}
|
||||
void getGradient(const double* x,double* grad){
|
||||
// use automatically computed gradient
|
||||
/*void getGradient(const double* x,double* grad){
|
||||
for(int i=0;i<4;i++){
|
||||
grad[i]=2*x[i];
|
||||
}
|
||||
}
|
||||
}*/
|
||||
};
|
||||
class RosenbrockF:public cv::MinProblemSolver::Function{
|
||||
class RosenbrockF_CG:public cv::MinProblemSolver::Function{
|
||||
int getDims() const { return 2; }
|
||||
double calc(const double* x)const{
|
||||
return 100*(x[1]-x[0]*x[0])*(x[1]-x[0]*x[0])+(1-x[0])*(1-x[0]);
|
||||
}
|
||||
@ -79,11 +82,11 @@ class RosenbrockF:public cv::MinProblemSolver::Function{
|
||||
}
|
||||
};
|
||||
|
||||
TEST(DISABLED_Core_ConjGradSolver, regression_basic){
|
||||
TEST(Core_ConjGradSolver, regression_basic){
|
||||
cv::Ptr<cv::ConjGradSolver> solver=cv::ConjGradSolver::create();
|
||||
#if 1
|
||||
{
|
||||
cv::Ptr<cv::MinProblemSolver::Function> ptr_F(new SphereF());
|
||||
cv::Ptr<cv::MinProblemSolver::Function> ptr_F(new SphereF_CG());
|
||||
cv::Mat x=(cv::Mat_<double>(4,1)<<50.0,10.0,1.0,-10.0),
|
||||
etalon_x=(cv::Mat_<double>(1,4)<<0.0,0.0,0.0,0.0);
|
||||
double etalon_res=0.0;
|
||||
@ -92,7 +95,7 @@ TEST(DISABLED_Core_ConjGradSolver, regression_basic){
|
||||
#endif
|
||||
#if 1
|
||||
{
|
||||
cv::Ptr<cv::MinProblemSolver::Function> ptr_F(new RosenbrockF());
|
||||
cv::Ptr<cv::MinProblemSolver::Function> ptr_F(new RosenbrockF_CG());
|
||||
cv::Mat x=(cv::Mat_<double>(2,1)<<0.0,0.0),
|
||||
etalon_x=(cv::Mat_<double>(2,1)<<1.0,1.0);
|
||||
double etalon_res=0.0;
|
||||
|
@ -68,17 +68,19 @@ static void mytest(cv::Ptr<cv::DownhillSolver> solver,cv::Ptr<cv::MinProblemSolv
|
||||
|
||||
class SphereF:public cv::MinProblemSolver::Function{
|
||||
public:
|
||||
int getDims() const { return 2; }
|
||||
double calc(const double* x)const{
|
||||
return x[0]*x[0]+x[1]*x[1];
|
||||
}
|
||||
};
|
||||
class RosenbrockF:public cv::MinProblemSolver::Function{
|
||||
int getDims() const { return 2; }
|
||||
double calc(const double* x)const{
|
||||
return 100*(x[1]-x[0]*x[0])*(x[1]-x[0]*x[0])+(1-x[0])*(1-x[0]);
|
||||
}
|
||||
};
|
||||
|
||||
TEST(DISABLED_Core_DownhillSolver, regression_basic){
|
||||
TEST(Core_DownhillSolver, regression_basic){
|
||||
cv::Ptr<cv::DownhillSolver> solver=cv::DownhillSolver::create();
|
||||
#if 1
|
||||
{
|
||||
|
@ -866,3 +866,24 @@ protected:
|
||||
};
|
||||
|
||||
TEST(Core_DFT, complex_output) { Core_DFTComplexOutputTest test; test.safe_run(); }
|
||||
|
||||
TEST(Core_DFT, complex_output2)
|
||||
{
|
||||
for( int i = 0; i < 100; i++ )
|
||||
{
|
||||
int type = theRNG().uniform(0, 2) ? CV_64F : CV_32F;
|
||||
int m = theRNG().uniform(1, 10);
|
||||
int n = theRNG().uniform(1, 10);
|
||||
Mat x(m, n, type), out;
|
||||
randu(x, -1., 1.);
|
||||
dft(x, out, DFT_ROWS | DFT_COMPLEX_OUTPUT);
|
||||
double nrm = norm(out, NORM_INF);
|
||||
double thresh = n*m*2;
|
||||
if( nrm > thresh )
|
||||
{
|
||||
cout << "x: " << x << endl;
|
||||
cout << "out: " << out << endl;
|
||||
ASSERT_LT(nrm, thresh);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1209,3 +1209,42 @@ TEST(Core_Mat, copyNx1ToVector)
|
||||
|
||||
ASSERT_PRED_FORMAT2(cvtest::MatComparator(0, 0), ref_dst16, cv::Mat_<ushort>(dst16));
|
||||
}
|
||||
|
||||
TEST(Core_Matx, fromMat_)
|
||||
{
|
||||
Mat_<double> a = (Mat_<double>(2,2) << 10, 11, 12, 13);
|
||||
Matx22d b(a);
|
||||
ASSERT_EQ( norm(a, b, NORM_INF), 0.);
|
||||
}
|
||||
|
||||
TEST(Core_InputArray, empty)
|
||||
{
|
||||
vector<vector<Point> > data;
|
||||
ASSERT_TRUE( _InputArray(data).empty() );
|
||||
}
|
||||
|
||||
TEST(Core_CopyMask, bug1918)
|
||||
{
|
||||
Mat_<unsigned char> tmpSrc(100,100);
|
||||
tmpSrc = 124;
|
||||
Mat_<unsigned char> tmpMask(100,100);
|
||||
tmpMask = 255;
|
||||
Mat_<unsigned char> tmpDst(100,100);
|
||||
tmpDst = 2;
|
||||
tmpSrc.copyTo(tmpDst,tmpMask);
|
||||
ASSERT_EQ(sum(tmpDst)[0], 124*100*100);
|
||||
}
|
||||
|
||||
TEST(Core_SVD, orthogonality)
|
||||
{
|
||||
for( int i = 0; i < 2; i++ )
|
||||
{
|
||||
int type = i == 0 ? CV_32F : CV_64F;
|
||||
Mat mat_D(2, 2, type);
|
||||
mat_D.setTo(88.);
|
||||
Mat mat_U, mat_W;
|
||||
SVD::compute(mat_D, mat_W, mat_U, noArray(), SVD::FULL_UV);
|
||||
mat_U *= mat_U.t();
|
||||
ASSERT_LT(norm(mat_U, Mat::eye(2, 2, type), NORM_INF), 1e-5);
|
||||
}
|
||||
}
|
||||
|
@ -232,7 +232,7 @@ void Core_PowTest::prepare_to_validation( int /*test_case_idx*/ )
|
||||
for( j = 0; j < ncols; j++ )
|
||||
{
|
||||
int val = ((uchar*)a_data)[j];
|
||||
((uchar*)b_data)[j] = (uchar)(val <= 1 ? val :
|
||||
((uchar*)b_data)[j] = (uchar)(val == 0 ? 255 : val == 1 ? 1 :
|
||||
val == 2 && ipower == -1 ? 1 : 0);
|
||||
}
|
||||
else
|
||||
@ -247,17 +247,17 @@ void Core_PowTest::prepare_to_validation( int /*test_case_idx*/ )
|
||||
if( ipower < 0 )
|
||||
for( j = 0; j < ncols; j++ )
|
||||
{
|
||||
int val = ((char*)a_data)[j];
|
||||
((char*)b_data)[j] = (char)((val&~1)==0 ? val :
|
||||
int val = ((schar*)a_data)[j];
|
||||
((schar*)b_data)[j] = (schar)(val == 0 ? 127 : val == 1 ? 1 :
|
||||
val ==-1 ? 1-2*(ipower&1) :
|
||||
val == 2 && ipower == -1 ? 1 : 0);
|
||||
}
|
||||
else
|
||||
for( j = 0; j < ncols; j++ )
|
||||
{
|
||||
int val = ((char*)a_data)[j];
|
||||
int val = ((schar*)a_data)[j];
|
||||
val = ipow( val, ipower );
|
||||
((char*)b_data)[j] = saturate_cast<schar>(val);
|
||||
((schar*)b_data)[j] = saturate_cast<schar>(val);
|
||||
}
|
||||
break;
|
||||
case CV_16U:
|
||||
@ -265,7 +265,7 @@ void Core_PowTest::prepare_to_validation( int /*test_case_idx*/ )
|
||||
for( j = 0; j < ncols; j++ )
|
||||
{
|
||||
int val = ((ushort*)a_data)[j];
|
||||
((ushort*)b_data)[j] = (ushort)((val&~1)==0 ? val :
|
||||
((ushort*)b_data)[j] = (ushort)(val == 0 ? 65535 : val == 1 ? 1 :
|
||||
val ==-1 ? 1-2*(ipower&1) :
|
||||
val == 2 && ipower == -1 ? 1 : 0);
|
||||
}
|
||||
@ -282,7 +282,7 @@ void Core_PowTest::prepare_to_validation( int /*test_case_idx*/ )
|
||||
for( j = 0; j < ncols; j++ )
|
||||
{
|
||||
int val = ((short*)a_data)[j];
|
||||
((short*)b_data)[j] = (short)((val&~1)==0 ? val :
|
||||
((short*)b_data)[j] = (short)(val == 0 ? 32767 : val == 1 ? 1 :
|
||||
val ==-1 ? 1-2*(ipower&1) :
|
||||
val == 2 && ipower == -1 ? 1 : 0);
|
||||
}
|
||||
@ -299,7 +299,7 @@ void Core_PowTest::prepare_to_validation( int /*test_case_idx*/ )
|
||||
for( j = 0; j < ncols; j++ )
|
||||
{
|
||||
int val = ((int*)a_data)[j];
|
||||
((int*)b_data)[j] = (val&~1)==0 ? val :
|
||||
((int*)b_data)[j] = val == 0 ? INT_MAX : val == 1 ? 1 :
|
||||
val ==-1 ? 1-2*(ipower&1) :
|
||||
val == 2 && ipower == -1 ? 1 : 0;
|
||||
}
|
||||
@ -351,8 +351,6 @@ void Core_PowTest::prepare_to_validation( int /*test_case_idx*/ )
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////// matrix tests ////////////////////////////////////////////
|
||||
|
||||
class Core_MatrixTest : public cvtest::ArrayTest
|
||||
@ -2822,4 +2820,56 @@ TEST(CovariationMatrixVectorOfMatWithMean, accuracy)
|
||||
ASSERT_EQ(sDiff.dot(sDiff), 0.0);
|
||||
}
|
||||
|
||||
TEST(Core_Pow, special)
|
||||
{
|
||||
for( int i = 0; i < 100; i++ )
|
||||
{
|
||||
int n = theRNG().uniform(1, 30);
|
||||
Mat mtx0(1, n, CV_8S), mtx, result;
|
||||
randu(mtx0, -5, 5);
|
||||
|
||||
int type = theRNG().uniform(0, 2) ? CV_64F : CV_32F;
|
||||
double eps = type == CV_32F ? 1e-3 : 1e-10;
|
||||
mtx0.convertTo(mtx, type);
|
||||
// generate power from [-n, n] interval with 1/8 step - enough to check various cases.
|
||||
const int max_pf = 3;
|
||||
int pf = theRNG().uniform(0, max_pf*2+1);
|
||||
double power = ((1 << pf) - (1 << (max_pf*2-1)))/16.;
|
||||
int ipower = cvRound(power);
|
||||
bool is_ipower = ipower == power;
|
||||
cv::pow(mtx, power, result);
|
||||
for( int j = 0; j < n; j++ )
|
||||
{
|
||||
double val = type == CV_32F ? (double)mtx.at<float>(j) : mtx.at<double>(j);
|
||||
double r = type == CV_32F ? (double)result.at<float>(j) : result.at<double>(j);
|
||||
double r0;
|
||||
if( power == 0. )
|
||||
r0 = 1;
|
||||
else if( is_ipower )
|
||||
{
|
||||
r0 = 1;
|
||||
for( int k = 0; k < std::abs(ipower); k++ )
|
||||
r0 *= val;
|
||||
if( ipower < 0 )
|
||||
r0 = 1./r0;
|
||||
}
|
||||
else
|
||||
r0 = std::pow(val, power);
|
||||
if( cvIsInf(r0) )
|
||||
{
|
||||
ASSERT_TRUE(cvIsInf(r) != 0);
|
||||
}
|
||||
else if( cvIsNaN(r0) )
|
||||
{
|
||||
ASSERT_TRUE(cvIsNaN(r) != 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
ASSERT_TRUE(cvIsInf(r) == 0 && cvIsNaN(r) == 0);
|
||||
ASSERT_LT(fabs(r - r0), eps);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* End of file. */
|
||||
|
@ -322,6 +322,9 @@ OPENCV_HAL_IMPL_NEON_BIN_OP(*, v_int16x8, vmulq_s16)
|
||||
OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_int32x4, vaddq_s32)
|
||||
OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_int32x4, vsubq_s32)
|
||||
OPENCV_HAL_IMPL_NEON_BIN_OP(*, v_int32x4, vmulq_s32)
|
||||
OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_uint32x4, vaddq_u32)
|
||||
OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_uint32x4, vsubq_u32)
|
||||
OPENCV_HAL_IMPL_NEON_BIN_OP(*, v_uint32x4, vmulq_u32)
|
||||
OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_float32x4, vaddq_f32)
|
||||
OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_float32x4, vsubq_f32)
|
||||
OPENCV_HAL_IMPL_NEON_BIN_OP(*, v_float32x4, vmulq_f32)
|
||||
|
@ -614,6 +614,16 @@ inline v_int32x4 operator * (const v_int32x4& a, const v_int32x4& b)
|
||||
__m128i d1 = _mm_unpackhi_epi32(c0, c1);
|
||||
return v_int32x4(_mm_unpacklo_epi64(d0, d1));
|
||||
}
|
||||
inline v_uint32x4& operator *= (v_uint32x4& a, const v_uint32x4& b)
|
||||
{
|
||||
a = a * b;
|
||||
return a;
|
||||
}
|
||||
inline v_int32x4& operator *= (v_int32x4& a, const v_int32x4& b)
|
||||
{
|
||||
a = a * b;
|
||||
return a;
|
||||
}
|
||||
|
||||
inline void v_mul_expand(const v_int16x8& a, const v_int16x8& b,
|
||||
v_int32x4& c, v_int32x4& d)
|
||||
|
Loading…
x
Reference in New Issue
Block a user