Finish implementing the Nonlinear Conjugate Gradient

Now everything is prepared for the pull request.
2013-09-24 07:51:21 +08:00
parent 581d454536
commit 891bcd8491
5 changed files with 93 additions and 34 deletions
--- a/modules/optim/doc/conjugate_gradient.rst
+++ b/modules/optim/doc/conjugate_gradient.rst
@@ -1,11 +0,0 @@
 Conjugate Gradient
 =======================
 .. highlight:: cpp
 optim::ConjGradSolver
 ---------------------------------
 .. ocv:class:: optim::ConjGradSolver
 This class is used
--- a/modules/optim/doc/downhill_simplex_method.rst
+++ b/modules/optim/doc/downhill_simplex_method.rst
@@ -8,14 +8,15 @@ optim::DownhillSolver
 .. ocv:class:: optim::DownhillSolver
-This class is used to perform the non-linear non-constrained *minimization* of a function, given on an *n*-dimensional Euclidean space,
+This class is used to perform the non-linear non-constrained *minimization* of a function, defined on an *n*-dimensional Euclidean space,
 using the **Nelder-Mead method**, also known as **downhill simplex method**. The basic idea about the method can be obtained from
 (`http://en.wikipedia.org/wiki/Nelder-Mead\_method <http://en.wikipedia.org/wiki/Nelder-Mead_method>`_). It should be noted, that
 this method, although deterministic, is rather a heuristic and therefore may converge to a local minima, not necessary a global one.
 It is iterative optimization technique, which at each step uses an information about the values of a function evaluated only at
 *n+1* points, arranged as a *simplex* in *n*-dimensional space (hence the second name of the method). At each step new point is
 chosen to evaluate function at, obtained value is compared with previous ones and based on this information simplex changes it's shape
-, slowly moving to the local minimum.
+, slowly moving to the local minimum. Thus this method is using *only* function values to make decision, on contrary to, say, Nonlinear
 Conjugate Gradient method (which is also implemented in ``optim``).
 Algorithm stops when the number of function evaluations done exceeds ``termcrit.maxCount``, when the function values at the
 vertices of simplex are within ``termcrit.epsilon`` range or simplex becomes so small that it
@@ -30,9 +31,9 @@ positive integer ``termcrit.maxCount`` and positive non-integer ``termcrit.epsil
        class CV_EXPORTS Function
        {
        public:
-           virtual ~Function() {}
+            virtual ~Function() {}
-           //! ndim - dimensionality
+            virtual double calc(const double* x) const = 0;
-           virtual double calc(const double* x) const = 0;
+            virtual void getGradient(const double* /*x*/,double* /*grad*/) {}
        };
        virtual Ptr<Function> getFunction() const = 0;
@@ -150,7 +151,7 @@ optim::createDownhillSolver
 This function returns the reference to the ready-to-use ``DownhillSolver`` object. All the parameters are optional, so this procedure can be called
 even without parameters at all. In this case, the default values will be used. As default value for terminal criteria are the only sensible ones,
 ``DownhillSolver::setFunction()`` and ``DownhillSolver::setInitStep()`` should be called upon the obtained object, if the respective parameters
-were not given to ``createDownhillSolver()``. Otherwise, the two ways (give parameters to ``createDownhillSolver()`` or miss the out and call the
+were not given to ``createDownhillSolver()``. Otherwise, the two ways (give parameters to ``createDownhillSolver()`` or miss them out and call the
 ``DownhillSolver::setFunction()`` and ``DownhillSolver::setInitStep()``) are absolutely equivalent (and will drop the same errors in the same way,
 should invalid input be detected).
--- a/modules/optim/doc/optim.rst
+++ b/modules/optim/doc/optim.rst
@@ -10,4 +10,4 @@ optim. Generic numerical optimization
    linear_programming
    downhill_simplex_method
    primal_dual_algorithm
-    conjugate_gradient
+    nonlinear_conjugate_gradient
--- a/modules/optim/src/conjugate_gradient.cpp
+++ b/modules/optim/src/conjugate_gradient.cpp
@@ -1,8 +1,11 @@
 #include "precomp.hpp"
 #undef ALEX_DEBUG
 #include "debug.hpp"
 namespace cv{namespace optim{
 #define SEC_METHOD_ITERATIONS 4
 #define INITIAL_SEC_METHOD_SIGMA 0.1
    class ConjGradSolverImpl : public ConjGradSolver
    {
    public:
@@ -16,9 +19,45 @@ namespace cv{namespace optim{
        Ptr<Solver::Function> _Function;
        TermCriteria _termcrit;
        Mat_<double> d,r,buf_x,r_old;
        Mat_<double> minimizeOnTheLine_buf1,minimizeOnTheLine_buf2;
    private:
        static void minimizeOnTheLine(Ptr<Solver::Function> _f,Mat_<double>& x,const Mat_<double>& d,Mat_<double>& buf1,Mat_<double>& buf2);
    };
    void ConjGradSolverImpl::minimizeOnTheLine(Ptr<Solver::Function> _f,Mat_<double>& x,const Mat_<double>& d,Mat_<double>& buf1,
            Mat_<double>& buf2){
        double sigma=INITIAL_SEC_METHOD_SIGMA;
        buf1=0.0;
        buf2=0.0;
        dprintf(("before minimizeOnTheLine\n"));
        dprintf(("x:\n"));
        print_matrix(x);
        dprintf(("d:\n"));
        print_matrix(d);
        for(int i=0;i<SEC_METHOD_ITERATIONS;i++){
            _f->getGradient((double*)x.data,(double*)buf1.data); 
            dprintf(("buf1:\n"));
            print_matrix(buf1);
            x=x+sigma*d;
            _f->getGradient((double*)x.data,(double*)buf2.data); 
            dprintf(("buf2:\n"));
            print_matrix(buf2);
            double d1=buf1.dot(d), d2=buf2.dot(d);
            if((d1-d2)==0){
                break;
            }
            double alpha=-sigma*d1/(d2-d1);
            dprintf(("(buf2.dot(d)-buf1.dot(d))=%f\nalpha=%f\n",(buf2.dot(d)-buf1.dot(d)),alpha));
            x=x+(alpha-sigma)*d;
            sigma=-alpha;
        }
        dprintf(("after minimizeOnTheLine\n"));
        print_matrix(x);
    }
    double ConjGradSolverImpl::minimize(InputOutputArray x){
        CV_Assert(_Function.empty()==false);
        dprintf(("termcrit:\n\ttype: %d\n\tmaxCount: %d\n\tEPS: %g\n",_termcrit.type,_termcrit.maxCount,_termcrit.epsilon));
@@ -28,9 +67,13 @@ namespace cv{namespace optim{
        int ndim=MAX(x_mat.rows,x_mat.cols);
        CV_Assert(x_mat.type()==CV_64FC1);
-        d.create(1,ndim);
+        if(d.cols!=ndim){
-        r.create(1,ndim);
+            d.create(1,ndim);
-        r_old.create(1,ndim);
+            r.create(1,ndim);
            r_old.create(1,ndim);
            minimizeOnTheLine_buf1.create(1,ndim);
            minimizeOnTheLine_buf2.create(1,ndim);
        }
        Mat_<double> proxy_x;
        if(x_mat.rows>1){
@@ -41,14 +84,40 @@ namespace cv{namespace optim{
        }else{
            proxy_x=x_mat;
        }
        _Function->getGradient((double*)proxy_x.data,(double*)d.data);
        if(true){
        d*=-1.0;
        d.copyTo(r);
        }else{((double*)d.data)[1]=42.0;}
        //here everything goes. check that everything is setted properly
        dprintf(("proxy_x\n"));print_matrix(proxy_x);
        dprintf(("d first time\n"));print_matrix(d);
        dprintf(("r\n"));print_matrix(r);
        double beta=0;
        for(int count=0;count<_termcrit.maxCount;count++){
            minimizeOnTheLine(_Function,proxy_x,d,minimizeOnTheLine_buf1,minimizeOnTheLine_buf2);
            r.copyTo(r_old);
            _Function->getGradient((double*)proxy_x.data,(double*)r.data);
            r*=-1.0;
            double r_norm_sq=norm(r);
            if(_termcrit.type==(TermCriteria::MAX_ITER+TermCriteria::EPS) && r_norm_sq<_termcrit.epsilon){
                break;
            }
            r_norm_sq=r_norm_sq*r_norm_sq;
            beta=MAX(0.0,(r_norm_sq-r.dot(r_old))/r_norm_sq);
            d=r+beta*d;
        }
        if(x_mat.rows>1){
            Mat(ndim, 1, CV_64F, (double*)proxy_x.data).copyTo(x);
        }
-        return 0.0;
+        return _Function->calc((double*)proxy_x.data);
    }
    ConjGradSolverImpl::ConjGradSolverImpl(){
        _Function=Ptr<Function>();
    }
@@ -74,4 +143,3 @@ namespace cv{namespace optim{
        return Ptr<ConjGradSolver>(CG);
    }
 }}
--- a/modules/optim/test/test_conjugate_gradient.cpp
+++ b/modules/optim/test/test_conjugate_gradient.cpp
@@ -24,38 +24,39 @@ public:
        return x[0]*x[0]+x[1]*x[1]+x[2]*x[2]+x[3]*x[3];
    }
    void getGradient(const double* x,double* grad){
-        for(int i=0;i<4;i++,grad++,x++){
+        for(int i=0;i<4;i++){
-            grad[0]=2*x[0];
+            grad[i]=2*x[i];
        }
    }
 };
-//TODO: test transp/usual x
+class RosenbrockF:public cv::optim::Solver::Function{
 /*class RosenbrockF:public cv::optim::Solver::Function{
    double calc(const double* x)const{
        return 100*(x[1]-x[0]*x[0])*(x[1]-x[0]*x[0])+(1-x[0])*(1-x[0]);
    }
-};*/
+    void getGradient(const double* x,double* grad){
            grad[0]=-2*(1-x[0])-400*(x[1]-x[0]*x[0])*x[0];
            grad[1]=200*(x[1]-x[0]*x[0]);
    }
 };
 TEST(Optim_ConjGrad, regression_basic){
    cv::Ptr<cv::optim::ConjGradSolver> solver=cv::optim::createConjGradSolver();
 #if 1
    {
        cv::Ptr<cv::optim::Solver::Function> ptr_F(new SphereF());
-        cv::Mat x=(cv::Mat_<double>(1,2)<<1.0,1.0),
+        cv::Mat x=(cv::Mat_<double>(4,1)<<50.0,10.0,1.0,-10.0),
-            etalon_x=(cv::Mat_<double>(1,2)<<0.0,0.0);
+            etalon_x=(cv::Mat_<double>(1,4)<<0.0,0.0,0.0,0.0);
        double etalon_res=0.0;
        return;
        mytest(solver,ptr_F,x,etalon_x,etalon_res);
    }
 #endif
-#if 0
+#if 1
    {
        cv::Ptr<cv::optim::Solver::Function> ptr_F(new RosenbrockF());
        cv::Mat x=(cv::Mat_<double>(2,1)<<0.0,0.0),
            step=(cv::Mat_<double>(2,1)<<0.5,+0.5),
            etalon_x=(cv::Mat_<double>(2,1)<<1.0,1.0);
        double etalon_res=0.0;
-        mytest(solver,ptr_F,x,step,etalon_x,etalon_res);
+        mytest(solver,ptr_F,x,etalon_x,etalon_res);
    }
 #endif
 }