Difference between revisions of "Example Levenberg-Marquardt"

From Efficient Java Matrix Library
Jump to navigation Jump to search
Line 6: Line 6:
  
 
External Resources:
 
External Resources:
* [https://github.com/lessthanoptimal/ejml/blob/v0.27/examples/src/org/ejml/example/LevenbergMarquardt.java LevenbergMarquardt.java code]
+
* [https://github.com/lessthanoptimal/ejml/blob/v0.31/examples/src/org/ejml/example/LevenbergMarquardt.java LevenbergMarquardt.java code]
 
* <disqus>Discuss this example</disqus>
 
* <disqus>Discuss this example</disqus>
  
Line 46: Line 46:
  
 
     // the optimized parameters and associated costs
 
     // the optimized parameters and associated costs
     private DenseMatrix64F param;
+
     private DMatrixRMaj param;
 
     private double initialCost;
 
     private double initialCost;
 
     private double finalCost;
 
     private double finalCost;
  
 
     // used by matrix operations
 
     // used by matrix operations
     private DenseMatrix64F d;
+
     private DMatrixRMaj d;
     private DenseMatrix64F H;
+
     private DMatrixRMaj H;
     private DenseMatrix64F negDelta;
+
     private DMatrixRMaj negDelta;
     private DenseMatrix64F tempParam;
+
     private DMatrixRMaj tempParam;
     private DenseMatrix64F A;
+
     private DMatrixRMaj A;
  
 
     // variables used by the numerical jacobian algorithm
 
     // variables used by the numerical jacobian algorithm
     private DenseMatrix64F temp0;
+
     private DMatrixRMaj temp0;
     private DenseMatrix64F temp1;
+
     private DMatrixRMaj temp1;
 
     // used when computing d and H variables
 
     // used when computing d and H variables
     private DenseMatrix64F tempDH;
+
     private DMatrixRMaj tempDH;
  
 
     // Where the numerical Jacobian is stored.
 
     // Where the numerical Jacobian is stored.
     private DenseMatrix64F jacobian;
+
     private DMatrixRMaj jacobian;
  
 
     /**
 
     /**
Line 79: Line 79:
 
         int numParam = 1;
 
         int numParam = 1;
  
         this.temp0 = new DenseMatrix64F(maxElements,1);
+
         this.temp0 = new DMatrixRMaj(maxElements,1);
         this.temp1 = new DenseMatrix64F(maxElements,1);
+
         this.temp1 = new DMatrixRMaj(maxElements,1);
         this.tempDH = new DenseMatrix64F(maxElements,1);
+
         this.tempDH = new DMatrixRMaj(maxElements,1);
         this.jacobian = new DenseMatrix64F(numParam,maxElements);
+
         this.jacobian = new DMatrixRMaj(numParam,maxElements);
  
 
         this.func = funcCost;
 
         this.func = funcCost;
  
         this.param = new DenseMatrix64F(numParam,1);
+
         this.param = new DMatrixRMaj(numParam,1);
         this.d = new DenseMatrix64F(numParam,1);
+
         this.d = new DMatrixRMaj(numParam,1);
         this.H = new DenseMatrix64F(numParam,numParam);
+
         this.H = new DMatrixRMaj(numParam,numParam);
         this.negDelta = new DenseMatrix64F(numParam,1);
+
         this.negDelta = new DMatrixRMaj(numParam,1);
         this.tempParam = new DenseMatrix64F(numParam,1);
+
         this.tempParam = new DMatrixRMaj(numParam,1);
         this.A = new DenseMatrix64F(numParam,numParam);
+
         this.A = new DMatrixRMaj(numParam,numParam);
 
     }
 
     }
  
Line 103: Line 103:
 
     }
 
     }
  
     public DenseMatrix64F getParameters() {
+
     public DMatrixRMaj getParameters() {
 
         return param;
 
         return param;
 
     }
 
     }
Line 115: Line 115:
 
     * @return true if it succeeded and false if it did not.
 
     * @return true if it succeeded and false if it did not.
 
     */
 
     */
     public boolean optimize( DenseMatrix64F initParam ,
+
     public boolean optimize( DMatrixRMaj initParam ,
                             DenseMatrix64F X ,
+
                             DMatrixRMaj X ,
                             DenseMatrix64F Y )
+
                             DMatrixRMaj Y )
 
     {
 
     {
 
         configure(initParam,X,Y);
 
         configure(initParam,X,Y);
Line 138: Line 138:
 
     * or it iterates too many times
 
     * or it iterates too many times
 
     */
 
     */
     private boolean adjustParam(DenseMatrix64F X, DenseMatrix64F Y,
+
     private boolean adjustParam(DMatrixRMaj X, DMatrixRMaj Y,
 
                                 double prevCost) {
 
                                 double prevCost) {
 
         // lambda adjusts how big of a step it takes
 
         // lambda adjusts how big of a step it takes
Line 186: Line 186:
 
     * a matrix it will only declare new memory when needed.
 
     * a matrix it will only declare new memory when needed.
 
     */
 
     */
     protected void configure( DenseMatrix64F initParam , DenseMatrix64F X , DenseMatrix64F Y )
+
     protected void configure(DMatrixRMaj initParam , DMatrixRMaj X , DMatrixRMaj Y )
 
     {
 
     {
 
         if( Y.getNumRows() != X.getNumRows() ) {
 
         if( Y.getNumRows() != X.getNumRows() ) {
Line 222: Line 222:
 
     * H is an approximation of the hessian.
 
     * H is an approximation of the hessian.
 
     */
 
     */
     private void computeDandH( DenseMatrix64F param , DenseMatrix64F x , DenseMatrix64F y )
+
     private void computeDandH(DMatrixRMaj param , DMatrixRMaj x , DMatrixRMaj y )
 
     {
 
     {
 
         func.compute(param,x, tempDH);
 
         func.compute(param,x, tempDH);
Line 251: Line 251:
 
     * where I is an identity matrix.
 
     * where I is an identity matrix.
 
     */
 
     */
     private void computeA( DenseMatrix64F A , DenseMatrix64F H , double lambda )
+
     private void computeA(DMatrixRMaj A , DMatrixRMaj H , double lambda )
 
     {
 
     {
 
         final int numParam = param.getNumElements();
 
         final int numParam = param.getNumElements();
Line 266: Line 266:
 
     * cost = (1/N) Sum (f(x;p) - y)^2
 
     * cost = (1/N) Sum (f(x;p) - y)^2
 
     */
 
     */
     private double cost( DenseMatrix64F param , DenseMatrix64F X , DenseMatrix64F Y)
+
     private double cost(DMatrixRMaj param , DMatrixRMaj X , DMatrixRMaj Y)
 
     {
 
     {
 
         func.compute(param,X, temp0);
 
         func.compute(param,X, temp0);
Line 282: Line 282:
 
     * @param deriv Where the jacobian will be stored
 
     * @param deriv Where the jacobian will be stored
 
     */
 
     */
     protected void computeNumericalJacobian( DenseMatrix64F param ,
+
     protected void computeNumericalJacobian( DMatrixRMaj param ,
                                             DenseMatrix64F pt ,
+
                                             DMatrixRMaj pt ,
                                             DenseMatrix64F deriv )
+
                                             DMatrixRMaj deriv )
 
     {
 
     {
 
         double invDelta = 1.0/DELTA;
 
         double invDelta = 1.0/DELTA;
Line 315: Line 315:
 
         * @param y the resulting output.
 
         * @param y the resulting output.
 
         */
 
         */
         public void compute( DenseMatrix64F param , DenseMatrix64F x , DenseMatrix64F y );
+
         public void compute(DMatrixRMaj param , DMatrixRMaj x , DMatrixRMaj y );
 
     }
 
     }
 
}
 
}
 
</syntaxhighlight>
 
</syntaxhighlight>

Revision as of 12:38, 18 May 2017

Levenberg-Marquardt is a popular non-linear optimization algorithm. This example demonstrate how a basic implementation of Levenberg-Marquardt can be created using EJML's procedural interface. Unnecessary allocation of new memory is avoided by reshaping matrices. When a matrix is reshaped its width and height is changed but new memory is not declared unless the new shape requires more memory than is available.

The algorithm is provided a function, set of inputs, set of outputs, and an initial estimate of the parameters (this often works with all zeros). It finds the parameters that minimize the difference between the computed output and the observed output. A numerical Jacobian is used to estimate the function's gradient.

Note: This is a simple straight forward implementation of Levenberg-Marquardt and is not as robust as Minpack's implementation. If you are looking for a robust non-linear least-squares minimization library in Java check out DDogleg.

External Resources:

Example Code

/**
 * <p>
 * This is a straight forward implementation of the Levenberg-Marquardt (LM) algorithm. LM is used to minimize
 * non-linear cost functions:<br>
 * <br>
 * S(P) = Sum{ i=1:m , [y<sub>i</sub> - f(x<sub>i</sub>,P)]<sup>2</sup>}<br>
 * <br>
 * where P is the set of parameters being optimized.
 * </p>
 *
 * <p>
 * In each iteration the parameters are updated using the following equations:<br>
 * <br>
 * P<sub>i+1</sub> = (H + &lambda; I)<sup>-1</sup> d <br>
 * d =  (1/N) Sum{ i=1..N , (f(x<sub>i</sub>;P<sub>i</sub>) - y<sub>i</sub>) * jacobian(:,i) } <br>
 * H =  (1/N) Sum{ i=1..N , jacobian(:,i) * jacobian(:,i)<sup>T</sup> }
 * </p>
 * <p>
 * Whenever possible the allocation of new memory is avoided.  This is accomplished by reshaping matrices.
 * A matrix that is reshaped won't grow unless the new shape requires more memory than it has available.
 * </p>
 * @author Peter Abeles
 */
public class LevenbergMarquardt {
    // how much the numerical jacobian calculation perturbs the parameters by.
    // In better implementation there are better ways to compute this delta.  See Numerical Recipes.
    private final static double DELTA = 1e-8;

    private double initialLambda;

    // the function that is optimized
    private Function func;

    // the optimized parameters and associated costs
    private DMatrixRMaj param;
    private double initialCost;
    private double finalCost;

    // used by matrix operations
    private DMatrixRMaj d;
    private DMatrixRMaj H;
    private DMatrixRMaj negDelta;
    private DMatrixRMaj tempParam;
    private DMatrixRMaj A;

    // variables used by the numerical jacobian algorithm
    private DMatrixRMaj temp0;
    private DMatrixRMaj temp1;
    // used when computing d and H variables
    private DMatrixRMaj tempDH;

    // Where the numerical Jacobian is stored.
    private DMatrixRMaj jacobian;

    /**
     * Creates a new instance that uses the provided cost function.
     *
     * @param funcCost Cost function that is being optimized.
     */
    public LevenbergMarquardt( Function funcCost )
    {
        this.initialLambda = 1;

        // declare data to some initial small size. It will grow later on as needed.
        int maxElements = 1;
        int numParam = 1;

        this.temp0 = new DMatrixRMaj(maxElements,1);
        this.temp1 = new DMatrixRMaj(maxElements,1);
        this.tempDH = new DMatrixRMaj(maxElements,1);
        this.jacobian = new DMatrixRMaj(numParam,maxElements);

        this.func = funcCost;

        this.param = new DMatrixRMaj(numParam,1);
        this.d = new DMatrixRMaj(numParam,1);
        this.H = new DMatrixRMaj(numParam,numParam);
        this.negDelta = new DMatrixRMaj(numParam,1);
        this.tempParam = new DMatrixRMaj(numParam,1);
        this.A = new DMatrixRMaj(numParam,numParam);
    }


    public double getInitialCost() {
        return initialCost;
    }

    public double getFinalCost() {
        return finalCost;
    }

    public DMatrixRMaj getParameters() {
        return param;
    }

    /**
     * Finds the best fit parameters.
     *
     * @param initParam The initial set of parameters for the function.
     * @param X The inputs to the function.
     * @param Y The "observed" output of the function
     * @return true if it succeeded and false if it did not.
     */
    public boolean optimize( DMatrixRMaj initParam ,
                             DMatrixRMaj X ,
                             DMatrixRMaj Y )
    {
        configure(initParam,X,Y);

        // save the cost of the initial parameters so that it knows if it improves or not
        initialCost = cost(param,X,Y);

        // iterate until the difference between the costs is insignificant
        // or it iterates too many times
        if( !adjustParam(X, Y, initialCost) ) {
            finalCost = Double.NaN;
            return false;
        }

        return true;
    }

    /**
     * Iterate until the difference between the costs is insignificant
     * or it iterates too many times
     */
    private boolean adjustParam(DMatrixRMaj X, DMatrixRMaj Y,
                                double prevCost) {
        // lambda adjusts how big of a step it takes
        double lambda = initialLambda;
        // the difference between the current and previous cost
        double difference = 1000;

        for( int iter = 0; iter < 20 || difference < 1e-6 ; iter++ ) {
            // compute some variables based on the gradient
            computeDandH(param,X,Y);

            // try various step sizes and see if any of them improve the
            // results over what has already been done
            boolean foundBetter = false;
            for( int i = 0; i < 5; i++ ) {
                computeA(A,H,lambda);

                if( !solve(A,d,negDelta) ) {
                    return false;
                }
                // compute the candidate parameters
                subtract(param, negDelta, tempParam);

                double cost = cost(tempParam,X,Y);
                if( cost < prevCost ) {
                    // the candidate parameters produced better results so use it
                    foundBetter = true;
                    param.set(tempParam);
                    difference = prevCost - cost;
                    prevCost = cost;
                    lambda /= 10.0;
                } else {
                    lambda *= 10.0;
                }
            }

            // it reached a point where it can't improve so exit
            if( !foundBetter )
                break;
        }
        finalCost = prevCost;
        return true;
    }

    /**
     * Performs sanity checks on the input data and reshapes internal matrices.  By reshaping
     * a matrix it will only declare new memory when needed.
     */
    protected void configure(DMatrixRMaj initParam , DMatrixRMaj X , DMatrixRMaj Y )
    {
        if( Y.getNumRows() != X.getNumRows() ) {
            throw new IllegalArgumentException("Different vector lengths");
        } else if( Y.getNumCols() != 1 || X.getNumCols() != 1 ) {
            throw new IllegalArgumentException("Inputs must be a column vector");
        }

        int numParam = initParam.getNumElements();
        int numPoints = Y.getNumRows();

        if( param.getNumElements() != initParam.getNumElements() ) {
            // reshaping a matrix means that new memory is only declared when needed
            this.param.reshape(numParam,1, false);
            this.d.reshape(numParam,1, false);
            this.H.reshape(numParam,numParam, false);
            this.negDelta.reshape(numParam,1, false);
            this.tempParam.reshape(numParam,1, false);
            this.A.reshape(numParam,numParam, false);
        }

        param.set(initParam);

        // reshaping a matrix means that new memory is only declared when needed
        temp0.reshape(numPoints,1, false);
        temp1.reshape(numPoints,1, false);
        tempDH.reshape(numPoints,1, false);
        jacobian.reshape(numParam,numPoints, false);


    }

    /**
     * Computes the d and H parameters.  Where d is the average error gradient and
     * H is an approximation of the hessian.
     */
    private void computeDandH(DMatrixRMaj param , DMatrixRMaj x , DMatrixRMaj y )
    {
        func.compute(param,x, tempDH);
        subtractEquals(tempDH, y);

        computeNumericalJacobian(param,x,jacobian);

        int numParam = param.getNumElements();
        int length = x.getNumElements();

        // d = average{ (f(x_i;p) - y_i) * jacobian(:,i) }
        for( int i = 0; i < numParam; i++ ) {
            double total = 0;
            for( int j = 0; j < length; j++ ) {
                total += tempDH.get(j,0)*jacobian.get(i,j);
            }
            d.set(i,0,total/length);
        }

        // compute the approximation of the hessian
        multTransB(jacobian,jacobian,H);
        scale(1.0/length,H);
    }

    /**
     * A = H + lambda*I <br>
     * <br>
     * where I is an identity matrix.
     */
    private void computeA(DMatrixRMaj A , DMatrixRMaj H , double lambda )
    {
        final int numParam = param.getNumElements();

        A.set(H);
        for( int i = 0; i < numParam; i++ ) {
            A.set(i,i, A.get(i,i) + lambda);
        }
    }

    /**
     * Computes the "cost" for the parameters given.
     *
     * cost = (1/N) Sum (f(x;p) - y)^2
     */
    private double cost(DMatrixRMaj param , DMatrixRMaj X , DMatrixRMaj Y)
    {
        func.compute(param,X, temp0);

        double error = diffNormF(temp0,Y);

        return error*error / (double)X.numRows;
    }

    /**
     * Computes a simple numerical Jacobian.
     *
     * @param param The set of parameters that the Jacobian is to be computed at.
     * @param pt The point around which the Jacobian is to be computed.
     * @param deriv Where the jacobian will be stored
     */
    protected void computeNumericalJacobian( DMatrixRMaj param ,
                                             DMatrixRMaj pt ,
                                             DMatrixRMaj deriv )
    {
        double invDelta = 1.0/DELTA;

        func.compute(param,pt, temp0);

        // compute the jacobian by perturbing the parameters slightly
        // then seeing how it effects the results.
        for( int i = 0; i < param.numRows; i++ ) {
            param.data[i] += DELTA;
            func.compute(param,pt, temp1);
            // compute the difference between the two parameters and divide by the delta
            add(invDelta,temp1,-invDelta,temp0,temp1);
            // copy the results into the jacobian matrix
            System.arraycopy(temp1.data,0,deriv.data,i*pt.numRows,pt.numRows);

            param.data[i] -= DELTA;
        }
    }

    /**
     * The function that is being optimized.
     */
    public interface Function {
        /**
         * Computes the output for each value in matrix x given the set of parameters.
         *
         * @param param The parameter for the function.
         * @param x the input points.
         * @param y the resulting output.
         */
        public void compute(DMatrixRMaj param , DMatrixRMaj x , DMatrixRMaj y );
    }
}