Difference between revisions of "Example Levenberg-Marquardt"

From Efficient Java Matrix Library
Jump to navigation Jump to search
 
Line 7: Line 7:
  
 
External Resources:
 
External Resources:
* [https://github.com/lessthanoptimal/ejml/blob/v0.35/examples/src/org/ejml/example/LevenbergMarquardt.java LevenbergMarquardt.java code]
+
* [https://github.com/lessthanoptimal/ejml/blob/v0.41/examples/src/org/ejml/example/LevenbergMarquardt.java LevenbergMarquardt.java code]
 
* <disqus>Discuss this example</disqus>
 
* <disqus>Discuss this example</disqus>
  
Line 34: Line 34:
 
  * A matrix that is reshaped won't grow unless the new shape requires more memory than it has available.
 
  * A matrix that is reshaped won't grow unless the new shape requires more memory than it has available.
 
  * </p>
 
  * </p>
 +
*
 
  * @author Peter Abeles
 
  * @author Peter Abeles
 
  */
 
  */
Line 53: Line 54:
  
 
     // the optimized parameters and associated costs
 
     // the optimized parameters and associated costs
     private DMatrixRMaj candidateParameters = new DMatrixRMaj(1,1);
+
     private DMatrixRMaj candidateParameters = new DMatrixRMaj(1, 1);
 
     private double initialCost;
 
     private double initialCost;
 
     private double finalCost;
 
     private double finalCost;
  
 
     // used by matrix operations
 
     // used by matrix operations
     private DMatrixRMaj g = new DMatrixRMaj(1,1);            // gradient
+
     private DMatrixRMaj g = new DMatrixRMaj(1, 1);            // gradient
     private DMatrixRMaj H = new DMatrixRMaj(1,1);            // Hessian approximation
+
     private DMatrixRMaj H = new DMatrixRMaj(1, 1);            // Hessian approximation
     private DMatrixRMaj Hdiag = new DMatrixRMaj(1,1);
+
     private DMatrixRMaj Hdiag = new DMatrixRMaj(1, 1);
     private DMatrixRMaj negativeStep = new DMatrixRMaj(1,1);
+
     private DMatrixRMaj negativeStep = new DMatrixRMaj(1, 1);
  
 
     // variables used by the numerical jacobian algorithm
 
     // variables used by the numerical jacobian algorithm
     private DMatrixRMaj temp0 = new DMatrixRMaj(1,1);
+
     private DMatrixRMaj temp0 = new DMatrixRMaj(1, 1);
     private DMatrixRMaj temp1 = new DMatrixRMaj(1,1);
+
     private DMatrixRMaj temp1 = new DMatrixRMaj(1, 1);
 
     // used when computing d and H variables
 
     // used when computing d and H variables
     private DMatrixRMaj residuals = new DMatrixRMaj(1,1);
+
     private DMatrixRMaj residuals = new DMatrixRMaj(1, 1);
  
 
     // Where the numerical Jacobian is stored.
 
     // Where the numerical Jacobian is stored.
     private DMatrixRMaj jacobian = new DMatrixRMaj(1,1);
+
     private DMatrixRMaj jacobian = new DMatrixRMaj(1, 1);
  
 
     public double getInitialCost() {
 
     public double getInitialCost() {
Line 81: Line 82:
  
 
     /**
 
     /**
    *
 
 
     * @param initialLambda Initial value of dampening parameter. Try 1 to start
 
     * @param initialLambda Initial value of dampening parameter. Try 1 to start
 
     */
 
     */
     public LevenbergMarquardt(double initialLambda) {
+
     public LevenbergMarquardt( double initialLambda ) {
 
         this.initialLambda = initialLambda;
 
         this.initialLambda = initialLambda;
 
     }
 
     }
Line 95: Line 95:
 
     * @param gtol convergence based on residual magnitude. Try 1e-12
 
     * @param gtol convergence based on residual magnitude. Try 1e-12
 
     */
 
     */
     public void setConvergence( int maxIterations , double ftol , double gtol ) {
+
     public void setConvergence( int maxIterations, double ftol, double gtol ) {
 
         this.maxIterations = maxIterations;
 
         this.maxIterations = maxIterations;
 
         this.ftol = ftol;
 
         this.ftol = ftol;
Line 108: Line 108:
 
     * @return true if it succeeded and false if it did not.
 
     * @return true if it succeeded and false if it did not.
 
     */
 
     */
     public boolean optimize(ResidualFunction function, DMatrixRMaj parameters )
+
     public boolean optimize( ResidualFunction function, DMatrixRMaj parameters ) {
    {
+
         configure(function, parameters.getNumElements());
         configure(function,parameters.getNumElements());
 
  
 
         // save the cost of the initial parameters so that it knows if it improves or not
 
         // save the cost of the initial parameters so that it knows if it improves or not
Line 121: Line 120:
 
         boolean computeHessian = true;
 
         boolean computeHessian = true;
  
         for( int iter = 0; iter < maxIterations; iter++ ) {
+
         for (int iter = 0; iter < maxIterations; iter++) {
             if( computeHessian ) {
+
             if (computeHessian) {
 
                 // compute some variables based on the gradient
 
                 // compute some variables based on the gradient
 
                 computeGradientAndHessian(parameters);
 
                 computeGradientAndHessian(parameters);
Line 130: Line 129:
 
                 boolean converged = true;
 
                 boolean converged = true;
 
                 for (int i = 0; i < g.getNumElements(); i++) {
 
                 for (int i = 0; i < g.getNumElements(); i++) {
                     if( Math.abs(g.data[i]) > gtol ) {
+
                     if (Math.abs(g.data[i]) > gtol) {
 
                         converged = false;
 
                         converged = false;
 
                         break;
 
                         break;
 
                     }
 
                     }
 
                 }
 
                 }
                 if( converged )
+
                 if (converged)
 
                     return true;
 
                     return true;
 
             }
 
             }
Line 141: Line 140:
 
             // H = H + lambda*I
 
             // H = H + lambda*I
 
             for (int i = 0; i < H.numRows; i++) {
 
             for (int i = 0; i < H.numRows; i++) {
                 H.set(i,i, Hdiag.get(i) + lambda);
+
                 H.set(i, i, Hdiag.get(i) + lambda);
 
             }
 
             }
  
 
             // In robust implementations failure to solve is handled much better
 
             // In robust implementations failure to solve is handled much better
             if( !CommonOps_DDRM.solve(H, g, negativeStep) ) {
+
             if (!CommonOps_DDRM.solve(H, g, negativeStep)) {
 
                 return false;
 
                 return false;
 
             }
 
             }
Line 153: Line 152:
  
 
             double cost = cost(candidateParameters);
 
             double cost = cost(candidateParameters);
             if( cost <= previousCost ) {
+
             if (cost <= previousCost) {
 
                 // the candidate parameters produced better results so use it
 
                 // the candidate parameters produced better results so use it
 
                 computeHessian = true;
 
                 computeHessian = true;
                 parameters.set(candidateParameters);
+
                 parameters.setTo(candidateParameters);
  
 
                 // check for convergence
 
                 // check for convergence
 
                 // ftol <= (cost(k) - cost(k+1))/cost(k)
 
                 // ftol <= (cost(k) - cost(k+1))/cost(k)
                 boolean converged = ftol*previousCost >= previousCost-cost;
+
                 boolean converged = ftol*previousCost >= previousCost - cost;
  
 
                 previousCost = cost;
 
                 previousCost = cost;
 
                 lambda /= 10.0;
 
                 lambda /= 10.0;
  
                 if( converged ) {
+
                 if (converged) {
 
                     return true;
 
                     return true;
 
                 }
 
                 }
Line 171: Line 170:
 
                 lambda *= 10.0;
 
                 lambda *= 10.0;
 
             }
 
             }
 
 
         }
 
         }
 
         finalCost = previousCost;
 
         finalCost = previousCost;
Line 181: Line 179:
 
     * a matrix it will only declare new memory when needed.
 
     * a matrix it will only declare new memory when needed.
 
     */
 
     */
     protected void configure(ResidualFunction function , int numParam )
+
     protected void configure( ResidualFunction function, int numParam ) {
    {
 
 
         this.function = function;
 
         this.function = function;
 
         int numFunctions = function.numFunctions();
 
         int numFunctions = function.numFunctions();
  
 
         // reshaping a matrix means that new memory is only declared when needed
 
         // reshaping a matrix means that new memory is only declared when needed
         candidateParameters.reshape(numParam,1);
+
         candidateParameters.reshape(numParam, 1);
         g.reshape(numParam,1);
+
         g.reshape(numParam, 1);
         H.reshape(numParam,numParam);
+
         H.reshape(numParam, numParam);
         negativeStep.reshape(numParam,1);
+
         negativeStep.reshape(numParam, 1);
  
 
         // Normally these variables are thought of as row vectors, but it works out easier if they are column
 
         // Normally these variables are thought of as row vectors, but it works out easier if they are column
         temp0.reshape(numFunctions,1);
+
         temp0.reshape(numFunctions, 1);
         temp1.reshape(numFunctions,1);
+
         temp1.reshape(numFunctions, 1);
         residuals.reshape(numFunctions,1);
+
         residuals.reshape(numFunctions, 1);
         jacobian.reshape(numFunctions,numParam);
+
         jacobian.reshape(numFunctions, numParam);
 
     }
 
     }
  
Line 205: Line 202:
 
     * H = J'*J
 
     * H = J'*J
 
     */
 
     */
     private void computeGradientAndHessian(DMatrixRMaj param )
+
     private void computeGradientAndHessian( DMatrixRMaj param ) {
    {
 
 
         // residuals = f(x) - y
 
         // residuals = f(x) - y
 
         function.compute(param, residuals);
 
         function.compute(param, residuals);
  
         computeNumericalJacobian(param,jacobian);
+
         computeNumericalJacobian(param, jacobian);
  
 
         CommonOps_DDRM.multTransA(jacobian, residuals, g);
 
         CommonOps_DDRM.multTransA(jacobian, residuals, g);
         CommonOps_DDRM.multTransA(jacobian, jacobian, H);
+
         CommonOps_DDRM.multTransA(jacobian, jacobian, H);
  
         CommonOps_DDRM.extractDiag(H,Hdiag);
+
         CommonOps_DDRM.extractDiag(H, Hdiag);
 
     }
 
     }
 
  
 
     /**
 
     /**
Line 224: Line 219:
 
     * cost = (1/N) Sum (f(x) - y)^2
 
     * cost = (1/N) Sum (f(x) - y)^2
 
     */
 
     */
     private double cost(DMatrixRMaj param )
+
     private double cost( DMatrixRMaj param ) {
    {
 
 
         function.compute(param, residuals);
 
         function.compute(param, residuals);
  
 
         double error = NormOps_DDRM.normF(residuals);
 
         double error = NormOps_DDRM.normF(residuals);
  
         return error*error / (double)residuals.numRows;
+
         return error*error/(double)residuals.numRows;
 
     }
 
     }
  
Line 239: Line 233:
 
     * @param jacobian (output) Where the jacobian will be stored
 
     * @param jacobian (output) Where the jacobian will be stored
 
     */
 
     */
     protected void computeNumericalJacobian( DMatrixRMaj param ,
+
     protected void computeNumericalJacobian( DMatrixRMaj param,
                                             DMatrixRMaj jacobian )
+
                                             DMatrixRMaj jacobian ) {
    {
 
 
         double invDelta = 1.0/DELTA;
 
         double invDelta = 1.0/DELTA;
  
Line 248: Line 241:
 
         // compute the jacobian by perturbing the parameters slightly
 
         // compute the jacobian by perturbing the parameters slightly
 
         // then seeing how it effects the results.
 
         // then seeing how it effects the results.
         for( int i = 0; i < param.getNumElements(); i++ ) {
+
         for (int i = 0; i < param.getNumElements(); i++) {
 
             param.data[i] += DELTA;
 
             param.data[i] += DELTA;
 
             function.compute(param, temp1);
 
             function.compute(param, temp1);
 
             // compute the difference between the two parameters and divide by the delta
 
             // compute the difference between the two parameters and divide by the delta
 
             // temp1 = (temp1 - temp0)/delta
 
             // temp1 = (temp1 - temp0)/delta
             CommonOps_DDRM.add(invDelta,temp1,-invDelta,temp0,temp1);
+
             CommonOps_DDRM.add(invDelta, temp1, -invDelta, temp0, temp1);
  
 
             // copy the results into the jacobian matrix
 
             // copy the results into the jacobian matrix
 
             // J(i,:) = temp1
 
             // J(i,:) = temp1
             CommonOps_DDRM.insert(temp1,jacobian,0,i);
+
             CommonOps_DDRM.insert(temp1, jacobian, 0, i);
  
 
             param.data[i] -= DELTA;
 
             param.data[i] -= DELTA;
Line 274: Line 267:
 
         * @param residual (Output) M by 1 output vector to store the residual = f(x)-y
 
         * @param residual (Output) M by 1 output vector to store the residual = f(x)-y
 
         */
 
         */
         void compute(DMatrixRMaj param , DMatrixRMaj residual );
+
         void compute( DMatrixRMaj param, DMatrixRMaj residual );
  
 
         /**
 
         /**
 
         * Number of functions in output
 
         * Number of functions in output
 +
        *
 
         * @return function count
 
         * @return function count
 
         */
 
         */

Latest revision as of 07:26, 7 July 2021

Levenberg-Marquardt (LM) is a popular non-linear optimization algorithm. This example demonstrate how a basic implementation of Levenberg-Marquardt can be created using EJML's procedural interface. Unnecessary allocation of new memory is avoided by reshaping matrices. When a matrix is reshaped its width and height is changed but new memory is not declared unless the new shape requires more memory than is available.

LM works by being provided a function which computes the residual error. Residual error is defined has the difference between the predicted output and the actual observed output, e.g. f(x)-y. Optimization works by finding a set of parameters which minimize the magnitude of the residuals based on the F2-norm.

Note: This is a simple straight forward implementation of Levenberg-Marquardt and is not as robust as Minpack's implementation. If you are looking for a robust non-linear least-squares minimization library in Java check out DDogleg.

External Resources:

Example Code

/**
 * <p>
 * This is a straight forward implementation of the Levenberg-Marquardt (LM) algorithm. LM is used to minimize
 * non-linear cost functions:<br>
 * <br>
 * S(P) = Sum{ i=1:m , [y<sub>i</sub> - f(x<sub>i</sub>,P)]<sup>2</sup>}<br>
 * <br>
 * where P is the set of parameters being optimized.
 * </p>
 *
 * <p>
 * In each iteration the parameters are updated using the following equations:<br>
 * <br>
 * P<sub>i+1</sub> = (H + &lambda; I)<sup>-1</sup> d <br>
 * d =  (1/N) Sum{ i=1..N , (f(x<sub>i</sub>;P<sub>i</sub>) - y<sub>i</sub>) * jacobian(:,i) } <br>
 * H =  (1/N) Sum{ i=1..N , jacobian(:,i) * jacobian(:,i)<sup>T</sup> }
 * </p>
 * <p>
 * Whenever possible the allocation of new memory is avoided.  This is accomplished by reshaping matrices.
 * A matrix that is reshaped won't grow unless the new shape requires more memory than it has available.
 * </p>
 *
 * @author Peter Abeles
 */
public class LevenbergMarquardt {
    // Convergence criteria
    private int maxIterations = 100;
    private double ftol = 1e-12;
    private double gtol = 1e-12;

    // how much the numerical jacobian calculation perturbs the parameters by.
    // In better implementation there are better ways to compute this delta.  See Numerical Recipes.
    private final static double DELTA = 1e-8;

    // Dampening. Larger values means it's more like gradient descent
    private double initialLambda;

    // the function that is optimized
    private ResidualFunction function;

    // the optimized parameters and associated costs
    private DMatrixRMaj candidateParameters = new DMatrixRMaj(1, 1);
    private double initialCost;
    private double finalCost;

    // used by matrix operations
    private DMatrixRMaj g = new DMatrixRMaj(1, 1);            // gradient
    private DMatrixRMaj H = new DMatrixRMaj(1, 1);            // Hessian approximation
    private DMatrixRMaj Hdiag = new DMatrixRMaj(1, 1);
    private DMatrixRMaj negativeStep = new DMatrixRMaj(1, 1);

    // variables used by the numerical jacobian algorithm
    private DMatrixRMaj temp0 = new DMatrixRMaj(1, 1);
    private DMatrixRMaj temp1 = new DMatrixRMaj(1, 1);
    // used when computing d and H variables
    private DMatrixRMaj residuals = new DMatrixRMaj(1, 1);

    // Where the numerical Jacobian is stored.
    private DMatrixRMaj jacobian = new DMatrixRMaj(1, 1);

    public double getInitialCost() {
        return initialCost;
    }

    public double getFinalCost() {
        return finalCost;
    }

    /**
     * @param initialLambda Initial value of dampening parameter. Try 1 to start
     */
    public LevenbergMarquardt( double initialLambda ) {
        this.initialLambda = initialLambda;
    }

    /**
     * Specifies convergence criteria
     *
     * @param maxIterations Maximum number of iterations
     * @param ftol convergence based on change in function value. try 1e-12
     * @param gtol convergence based on residual magnitude. Try 1e-12
     */
    public void setConvergence( int maxIterations, double ftol, double gtol ) {
        this.maxIterations = maxIterations;
        this.ftol = ftol;
        this.gtol = gtol;
    }

    /**
     * Finds the best fit parameters.
     *
     * @param function The function being optimized
     * @param parameters (Input/Output) initial parameter estimate and storage for optimized parameters
     * @return true if it succeeded and false if it did not.
     */
    public boolean optimize( ResidualFunction function, DMatrixRMaj parameters ) {
        configure(function, parameters.getNumElements());

        // save the cost of the initial parameters so that it knows if it improves or not
        double previousCost = initialCost = cost(parameters);

        // iterate until the difference between the costs is insignificant
        double lambda = initialLambda;

        // if it should recompute the Jacobian in this iteration or not
        boolean computeHessian = true;

        for (int iter = 0; iter < maxIterations; iter++) {
            if (computeHessian) {
                // compute some variables based on the gradient
                computeGradientAndHessian(parameters);
                computeHessian = false;

                // check for convergence using gradient test
                boolean converged = true;
                for (int i = 0; i < g.getNumElements(); i++) {
                    if (Math.abs(g.data[i]) > gtol) {
                        converged = false;
                        break;
                    }
                }
                if (converged)
                    return true;
            }

            // H = H + lambda*I
            for (int i = 0; i < H.numRows; i++) {
                H.set(i, i, Hdiag.get(i) + lambda);
            }

            // In robust implementations failure to solve is handled much better
            if (!CommonOps_DDRM.solve(H, g, negativeStep)) {
                return false;
            }

            // compute the candidate parameters
            CommonOps_DDRM.subtract(parameters, negativeStep, candidateParameters);

            double cost = cost(candidateParameters);
            if (cost <= previousCost) {
                // the candidate parameters produced better results so use it
                computeHessian = true;
                parameters.setTo(candidateParameters);

                // check for convergence
                // ftol <= (cost(k) - cost(k+1))/cost(k)
                boolean converged = ftol*previousCost >= previousCost - cost;

                previousCost = cost;
                lambda /= 10.0;

                if (converged) {
                    return true;
                }
            } else {
                lambda *= 10.0;
            }
        }
        finalCost = previousCost;
        return true;
    }

    /**
     * Performs sanity checks on the input data and reshapes internal matrices.  By reshaping
     * a matrix it will only declare new memory when needed.
     */
    protected void configure( ResidualFunction function, int numParam ) {
        this.function = function;
        int numFunctions = function.numFunctions();

        // reshaping a matrix means that new memory is only declared when needed
        candidateParameters.reshape(numParam, 1);
        g.reshape(numParam, 1);
        H.reshape(numParam, numParam);
        negativeStep.reshape(numParam, 1);

        // Normally these variables are thought of as row vectors, but it works out easier if they are column
        temp0.reshape(numFunctions, 1);
        temp1.reshape(numFunctions, 1);
        residuals.reshape(numFunctions, 1);
        jacobian.reshape(numFunctions, numParam);
    }

    /**
     * Computes the d and H parameters.
     *
     * d = J'*(f(x)-y)    <--- that's also the gradient
     * H = J'*J
     */
    private void computeGradientAndHessian( DMatrixRMaj param ) {
        // residuals = f(x) - y
        function.compute(param, residuals);

        computeNumericalJacobian(param, jacobian);

        CommonOps_DDRM.multTransA(jacobian, residuals, g);
        CommonOps_DDRM.multTransA(jacobian, jacobian, H);

        CommonOps_DDRM.extractDiag(H, Hdiag);
    }

    /**
     * Computes the "cost" for the parameters given.
     *
     * cost = (1/N) Sum (f(x) - y)^2
     */
    private double cost( DMatrixRMaj param ) {
        function.compute(param, residuals);

        double error = NormOps_DDRM.normF(residuals);

        return error*error/(double)residuals.numRows;
    }

    /**
     * Computes a simple numerical Jacobian.
     *
     * @param param (input) The set of parameters that the Jacobian is to be computed at.
     * @param jacobian (output) Where the jacobian will be stored
     */
    protected void computeNumericalJacobian( DMatrixRMaj param,
                                             DMatrixRMaj jacobian ) {
        double invDelta = 1.0/DELTA;

        function.compute(param, temp0);

        // compute the jacobian by perturbing the parameters slightly
        // then seeing how it effects the results.
        for (int i = 0; i < param.getNumElements(); i++) {
            param.data[i] += DELTA;
            function.compute(param, temp1);
            // compute the difference between the two parameters and divide by the delta
            // temp1 = (temp1 - temp0)/delta
            CommonOps_DDRM.add(invDelta, temp1, -invDelta, temp0, temp1);

            // copy the results into the jacobian matrix
            // J(i,:) = temp1
            CommonOps_DDRM.insert(temp1, jacobian, 0, i);

            param.data[i] -= DELTA;
        }
    }

    /**
     * The function that is being optimized. Returns the residual. f(x) - y
     */
    public interface ResidualFunction {
        /**
         * Computes the residual vector given the set of input parameters
         * Function which goes from N input to M outputs
         *
         * @param param (Input) N by 1 parameter vector
         * @param residual (Output) M by 1 output vector to store the residual = f(x)-y
         */
        void compute( DMatrixRMaj param, DMatrixRMaj residual );

        /**
         * Number of functions in output
         *
         * @return function count
         */
        int numFunctions();
    }
}