Difference between revisions of "Example Levenberg-Marquardt"
Line 36: | Line 36: | ||
*/ | */ | ||
public class LevenbergMarquardt { | public class LevenbergMarquardt { | ||
+ | // Convergence criteria | ||
+ | private int maxIterations = 100; | ||
+ | private double ftol = 1e-12; | ||
+ | private double gtol = 1e-12; | ||
+ | |||
// how much the numerical jacobian calculation perturbs the parameters by. | // how much the numerical jacobian calculation perturbs the parameters by. | ||
// In better implementation there are better ways to compute this delta. See Numerical Recipes. | // In better implementation there are better ways to compute this delta. See Numerical Recipes. | ||
private final static double DELTA = 1e-8; | private final static double DELTA = 1e-8; | ||
+ | // Dampening. Larger values means it's more like gradient descent | ||
private double initialLambda; | private double initialLambda; | ||
// the function that is optimized | // the function that is optimized | ||
− | private | + | private ResidualFunction function; |
// the optimized parameters and associated costs | // the optimized parameters and associated costs | ||
− | private DMatrixRMaj | + | private DMatrixRMaj candidateParameters = new DMatrixRMaj(1,1); |
private double initialCost; | private double initialCost; | ||
private double finalCost; | private double finalCost; | ||
// used by matrix operations | // used by matrix operations | ||
− | private DMatrixRMaj | + | private DMatrixRMaj g = new DMatrixRMaj(1,1); // gradient |
− | private DMatrixRMaj H; | + | private DMatrixRMaj H = new DMatrixRMaj(1,1); // Hessian approximation |
− | private DMatrixRMaj | + | private DMatrixRMaj Hdiag = new DMatrixRMaj(1,1); |
− | private DMatrixRMaj | + | private DMatrixRMaj negativeStep = new DMatrixRMaj(1,1); |
− | |||
// variables used by the numerical jacobian algorithm | // variables used by the numerical jacobian algorithm | ||
− | private DMatrixRMaj temp0; | + | private DMatrixRMaj temp0 = new DMatrixRMaj(1,1); |
− | private DMatrixRMaj temp1; | + | private DMatrixRMaj temp1 = new DMatrixRMaj(1,1); |
// used when computing d and H variables | // used when computing d and H variables | ||
− | private DMatrixRMaj | + | private DMatrixRMaj residuals = new DMatrixRMaj(1,1); |
// Where the numerical Jacobian is stored. | // Where the numerical Jacobian is stored. | ||
− | private DMatrixRMaj jacobian | + | private DMatrixRMaj jacobian = new DMatrixRMaj(1,1); |
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
public double getInitialCost() { | public double getInitialCost() { | ||
Line 103: | Line 79: | ||
} | } | ||
− | public | + | /** |
− | + | * | |
+ | * @param initialLambda Initial value of dampening parameter. Try 1 to start | ||
+ | */ | ||
+ | public LevenbergMarquardt(double initialLambda) { | ||
+ | this.initialLambda = initialLambda; | ||
+ | } | ||
+ | |||
+ | /** | ||
+ | * Specifies convergence criteria | ||
+ | * | ||
+ | * @param maxIterations Maximum number of iterations | ||
+ | * @param ftol convergence based on change in function value. try 1e-12 | ||
+ | * @param gtol convergence based on residual magnitude. Try 1e-12 | ||
+ | */ | ||
+ | public void setConvergence( int maxIterations , double ftol , double gtol ) { | ||
+ | this.maxIterations = maxIterations; | ||
+ | this.ftol = ftol; | ||
+ | this.gtol = gtol; | ||
} | } | ||
Line 110: | Line 103: | ||
* Finds the best fit parameters. | * Finds the best fit parameters. | ||
* | * | ||
− | * @param | + | * @param function The function being optimized |
− | + | * @param parameters (Input/Output) initial parameter estimate and storage for optimized parameters | |
− | * @param | ||
* @return true if it succeeded and false if it did not. | * @return true if it succeeded and false if it did not. | ||
*/ | */ | ||
− | public boolean optimize( | + | public boolean optimize(ResidualFunction function, DMatrixRMaj parameters ) |
− | |||
− | |||
{ | { | ||
− | configure( | + | configure(function,parameters.getNumElements()); |
// save the cost of the initial parameters so that it knows if it improves or not | // save the cost of the initial parameters so that it knows if it improves or not | ||
− | initialCost = cost( | + | double previousCost = initialCost = cost(parameters); |
// iterate until the difference between the costs is insignificant | // iterate until the difference between the costs is insignificant | ||
− | // or | + | double lambda = initialLambda; |
− | if( | + | |
− | + | // if it should recompute the Jacobian in this iteration or not | |
− | + | boolean computeHessian = true; | |
− | + | ||
+ | for( int iter = 0; iter < maxIterations; iter++ ) { | ||
+ | if( computeHessian ) { | ||
+ | // compute some variables based on the gradient | ||
+ | computeGradientAndHessian(parameters); | ||
+ | computeHessian = false; | ||
+ | |||
+ | // check for convergence using gradient test | ||
+ | boolean converged = true; | ||
+ | for (int i = 0; i < g.getNumElements(); i++) { | ||
+ | if( Math.abs(g.data[i]) > gtol ) { | ||
+ | converged = false; | ||
+ | break; | ||
+ | } | ||
+ | } | ||
+ | if( converged ) | ||
+ | return true; | ||
+ | } | ||
+ | |||
+ | // H = H + lambda*I | ||
+ | for (int i = 0; i < H.numRows; i++) { | ||
+ | H.set(i,i, Hdiag.get(i) + lambda); | ||
+ | } | ||
− | + | // In robust implementations failure to solve is handled much better | |
− | + | if( !CommonOps_DDRM.solve(H, g, negativeStep) ) { | |
+ | return false; | ||
+ | } | ||
− | + | // compute the candidate parameters | |
− | + | CommonOps_DDRM.subtract(parameters, negativeStep, candidateParameters); | |
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | + | double cost = cost(candidateParameters); | |
− | + | if( cost <= previousCost ) { | |
− | + | // the candidate parameters produced better results so use it | |
+ | computeHessian = true; | ||
+ | parameters.set(candidateParameters); | ||
− | + | // check for convergence | |
− | + | // ftol <= (cost(k) - cost(k+1))/cost(k) | |
− | + | boolean converged = ftol*previousCost >= previousCost-cost; | |
− | |||
− | |||
− | + | previousCost = cost; | |
− | + | lambda /= 10.0; | |
− | |||
− | |||
− | |||
− | + | if( converged ) { | |
− | if( | + | return true; |
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
} | } | ||
+ | } else { | ||
+ | lambda *= 10.0; | ||
} | } | ||
− | |||
− | |||
− | |||
} | } | ||
− | finalCost = | + | finalCost = previousCost; |
return true; | return true; | ||
} | } | ||
Line 186: | Line 180: | ||
* a matrix it will only declare new memory when needed. | * a matrix it will only declare new memory when needed. | ||
*/ | */ | ||
− | protected void configure( | + | protected void configure(ResidualFunction function , int numParam ) |
{ | { | ||
− | + | this.function = function; | |
− | + | int numFunctions = function.numFunctions(); | |
− | |||
− | |||
− | |||
− | |||
− | |||
− | int | ||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
// reshaping a matrix means that new memory is only declared when needed | // reshaping a matrix means that new memory is only declared when needed | ||
− | + | candidateParameters.reshape(numParam,1); | |
− | + | g.reshape(numParam,1); | |
− | + | H.reshape(numParam,numParam); | |
− | + | negativeStep.reshape(numParam,1); | |
− | |||
+ | // Normally these variables are thought of as row vectors, but it works out easier if they are column | ||
+ | temp0.reshape(numFunctions,1); | ||
+ | temp1.reshape(numFunctions,1); | ||
+ | residuals.reshape(numFunctions,1); | ||
+ | jacobian.reshape(numFunctions,numParam); | ||
} | } | ||
/** | /** | ||
− | * Computes the d and H parameters. | + | * Computes the d and H parameters. |
− | * H | + | * |
+ | * d = J'*(f(x)-y) <--- that's also the gradient | ||
+ | * H = J'*J | ||
*/ | */ | ||
− | private void | + | private void computeGradientAndHessian(DMatrixRMaj param ) |
{ | { | ||
− | + | // residuals = f(x) - y | |
− | + | function.compute(param, residuals); | |
− | |||
− | |||
− | + | computeNumericalJacobian(param,jacobian); | |
− | |||
− | + | CommonOps_DDRM.multTransA(jacobian, residuals, g); | |
− | + | CommonOps_DDRM.multTransA(jacobian, jacobian, H); | |
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | + | CommonOps_DDRM.extractDiag(H,Hdiag); | |
− | |||
− | |||
} | } | ||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
/** | /** | ||
* Computes the "cost" for the parameters given. | * Computes the "cost" for the parameters given. | ||
* | * | ||
− | * cost = (1/N) Sum (f(x | + | * cost = (1/N) Sum (f(x) - y)^2 |
*/ | */ | ||
− | private double cost(DMatrixRMaj param | + | private double cost(DMatrixRMaj param ) |
{ | { | ||
− | + | function.compute(param, residuals); | |
− | double error = | + | double error = NormOps_DDRM.normF(residuals); |
− | return error*error / (double) | + | return error*error / (double)residuals.numRows; |
} | } | ||
Line 278: | Line 235: | ||
* Computes a simple numerical Jacobian. | * Computes a simple numerical Jacobian. | ||
* | * | ||
− | * @param param The set of parameters that the Jacobian is to be computed at. | + | * @param param (input) The set of parameters that the Jacobian is to be computed at. |
− | * @param | + | * @param jacobian (output) Where the jacobian will be stored |
− | |||
*/ | */ | ||
protected void computeNumericalJacobian( DMatrixRMaj param , | protected void computeNumericalJacobian( DMatrixRMaj param , | ||
− | DMatrixRMaj | + | DMatrixRMaj jacobian ) |
− | |||
{ | { | ||
double invDelta = 1.0/DELTA; | double invDelta = 1.0/DELTA; | ||
− | + | function.compute(param, temp0); | |
// compute the jacobian by perturbing the parameters slightly | // compute the jacobian by perturbing the parameters slightly | ||
// then seeing how it effects the results. | // then seeing how it effects the results. | ||
− | for( int i = 0; i < param. | + | for( int i = 0; i < param.getNumElements(); i++ ) { |
param.data[i] += DELTA; | param.data[i] += DELTA; | ||
− | + | function.compute(param, temp1); | |
// compute the difference between the two parameters and divide by the delta | // compute the difference between the two parameters and divide by the delta | ||
− | add(invDelta,temp1,-invDelta,temp0,temp1); | + | // temp1 = (temp1 - temp0)/delta |
+ | CommonOps_DDRM.add(invDelta,temp1,-invDelta,temp0,temp1); | ||
+ | |||
// copy the results into the jacobian matrix | // copy the results into the jacobian matrix | ||
− | + | // J(i,:) = temp1 | |
+ | CommonOps_DDRM.insert(temp1,jacobian,0,i); | ||
param.data[i] -= DELTA; | param.data[i] -= DELTA; | ||
Line 305: | Line 263: | ||
/** | /** | ||
− | * The function that is being optimized. | + | * The function that is being optimized. Returns the residual. f(x) - y |
*/ | */ | ||
− | public interface | + | public interface ResidualFunction { |
/** | /** | ||
− | * Computes the | + | * Computes the residual vector given the set of input parameters |
+ | * Function which goes from N input to M outputs | ||
* | * | ||
− | * @param param | + | * @param param (Input) N by 1 parameter vector |
− | * @param x | + | * @param residual (Output) M by 1 output vector to store the residual = f(x)-y |
− | * | + | */ |
+ | void compute(DMatrixRMaj param , DMatrixRMaj residual ); | ||
+ | |||
+ | /** | ||
+ | * Number of functions in output | ||
+ | * @return function count | ||
*/ | */ | ||
− | + | int numFunctions(); | |
} | } | ||
} | } | ||
</syntaxhighlight> | </syntaxhighlight> |
Revision as of 05:14, 24 August 2018
Levenberg-Marquardt is a popular non-linear optimization algorithm. This example demonstrate how a basic implementation of Levenberg-Marquardt can be created using EJML's procedural interface. Unnecessary allocation of new memory is avoided by reshaping matrices. When a matrix is reshaped its width and height is changed but new memory is not declared unless the new shape requires more memory than is available.
The algorithm is provided a function, set of inputs, set of outputs, and an initial estimate of the parameters (this often works with all zeros). It finds the parameters that minimize the difference between the computed output and the observed output. A numerical Jacobian is used to estimate the function's gradient.
Note: This is a simple straight forward implementation of Levenberg-Marquardt and is not as robust as Minpack's implementation. If you are looking for a robust non-linear least-squares minimization library in Java check out DDogleg.
External Resources:
- LevenbergMarquardt.java code
- <disqus>Discuss this example</disqus>
Example Code
/**
* <p>
* This is a straight forward implementation of the Levenberg-Marquardt (LM) algorithm. LM is used to minimize
* non-linear cost functions:<br>
* <br>
* S(P) = Sum{ i=1:m , [y<sub>i</sub> - f(x<sub>i</sub>,P)]<sup>2</sup>}<br>
* <br>
* where P is the set of parameters being optimized.
* </p>
*
* <p>
* In each iteration the parameters are updated using the following equations:<br>
* <br>
* P<sub>i+1</sub> = (H + λ I)<sup>-1</sup> d <br>
* d = (1/N) Sum{ i=1..N , (f(x<sub>i</sub>;P<sub>i</sub>) - y<sub>i</sub>) * jacobian(:,i) } <br>
* H = (1/N) Sum{ i=1..N , jacobian(:,i) * jacobian(:,i)<sup>T</sup> }
* </p>
* <p>
* Whenever possible the allocation of new memory is avoided. This is accomplished by reshaping matrices.
* A matrix that is reshaped won't grow unless the new shape requires more memory than it has available.
* </p>
* @author Peter Abeles
*/
public class LevenbergMarquardt {
// Convergence criteria
private int maxIterations = 100;
private double ftol = 1e-12;
private double gtol = 1e-12;
// how much the numerical jacobian calculation perturbs the parameters by.
// In better implementation there are better ways to compute this delta. See Numerical Recipes.
private final static double DELTA = 1e-8;
// Dampening. Larger values means it's more like gradient descent
private double initialLambda;
// the function that is optimized
private ResidualFunction function;
// the optimized parameters and associated costs
private DMatrixRMaj candidateParameters = new DMatrixRMaj(1,1);
private double initialCost;
private double finalCost;
// used by matrix operations
private DMatrixRMaj g = new DMatrixRMaj(1,1); // gradient
private DMatrixRMaj H = new DMatrixRMaj(1,1); // Hessian approximation
private DMatrixRMaj Hdiag = new DMatrixRMaj(1,1);
private DMatrixRMaj negativeStep = new DMatrixRMaj(1,1);
// variables used by the numerical jacobian algorithm
private DMatrixRMaj temp0 = new DMatrixRMaj(1,1);
private DMatrixRMaj temp1 = new DMatrixRMaj(1,1);
// used when computing d and H variables
private DMatrixRMaj residuals = new DMatrixRMaj(1,1);
// Where the numerical Jacobian is stored.
private DMatrixRMaj jacobian = new DMatrixRMaj(1,1);
public double getInitialCost() {
return initialCost;
}
public double getFinalCost() {
return finalCost;
}
/**
*
* @param initialLambda Initial value of dampening parameter. Try 1 to start
*/
public LevenbergMarquardt(double initialLambda) {
this.initialLambda = initialLambda;
}
/**
* Specifies convergence criteria
*
* @param maxIterations Maximum number of iterations
* @param ftol convergence based on change in function value. try 1e-12
* @param gtol convergence based on residual magnitude. Try 1e-12
*/
public void setConvergence( int maxIterations , double ftol , double gtol ) {
this.maxIterations = maxIterations;
this.ftol = ftol;
this.gtol = gtol;
}
/**
* Finds the best fit parameters.
*
* @param function The function being optimized
* @param parameters (Input/Output) initial parameter estimate and storage for optimized parameters
* @return true if it succeeded and false if it did not.
*/
public boolean optimize(ResidualFunction function, DMatrixRMaj parameters )
{
configure(function,parameters.getNumElements());
// save the cost of the initial parameters so that it knows if it improves or not
double previousCost = initialCost = cost(parameters);
// iterate until the difference between the costs is insignificant
double lambda = initialLambda;
// if it should recompute the Jacobian in this iteration or not
boolean computeHessian = true;
for( int iter = 0; iter < maxIterations; iter++ ) {
if( computeHessian ) {
// compute some variables based on the gradient
computeGradientAndHessian(parameters);
computeHessian = false;
// check for convergence using gradient test
boolean converged = true;
for (int i = 0; i < g.getNumElements(); i++) {
if( Math.abs(g.data[i]) > gtol ) {
converged = false;
break;
}
}
if( converged )
return true;
}
// H = H + lambda*I
for (int i = 0; i < H.numRows; i++) {
H.set(i,i, Hdiag.get(i) + lambda);
}
// In robust implementations failure to solve is handled much better
if( !CommonOps_DDRM.solve(H, g, negativeStep) ) {
return false;
}
// compute the candidate parameters
CommonOps_DDRM.subtract(parameters, negativeStep, candidateParameters);
double cost = cost(candidateParameters);
if( cost <= previousCost ) {
// the candidate parameters produced better results so use it
computeHessian = true;
parameters.set(candidateParameters);
// check for convergence
// ftol <= (cost(k) - cost(k+1))/cost(k)
boolean converged = ftol*previousCost >= previousCost-cost;
previousCost = cost;
lambda /= 10.0;
if( converged ) {
return true;
}
} else {
lambda *= 10.0;
}
}
finalCost = previousCost;
return true;
}
/**
* Performs sanity checks on the input data and reshapes internal matrices. By reshaping
* a matrix it will only declare new memory when needed.
*/
protected void configure(ResidualFunction function , int numParam )
{
this.function = function;
int numFunctions = function.numFunctions();
// reshaping a matrix means that new memory is only declared when needed
candidateParameters.reshape(numParam,1);
g.reshape(numParam,1);
H.reshape(numParam,numParam);
negativeStep.reshape(numParam,1);
// Normally these variables are thought of as row vectors, but it works out easier if they are column
temp0.reshape(numFunctions,1);
temp1.reshape(numFunctions,1);
residuals.reshape(numFunctions,1);
jacobian.reshape(numFunctions,numParam);
}
/**
* Computes the d and H parameters.
*
* d = J'*(f(x)-y) <--- that's also the gradient
* H = J'*J
*/
private void computeGradientAndHessian(DMatrixRMaj param )
{
// residuals = f(x) - y
function.compute(param, residuals);
computeNumericalJacobian(param,jacobian);
CommonOps_DDRM.multTransA(jacobian, residuals, g);
CommonOps_DDRM.multTransA(jacobian, jacobian, H);
CommonOps_DDRM.extractDiag(H,Hdiag);
}
/**
* Computes the "cost" for the parameters given.
*
* cost = (1/N) Sum (f(x) - y)^2
*/
private double cost(DMatrixRMaj param )
{
function.compute(param, residuals);
double error = NormOps_DDRM.normF(residuals);
return error*error / (double)residuals.numRows;
}
/**
* Computes a simple numerical Jacobian.
*
* @param param (input) The set of parameters that the Jacobian is to be computed at.
* @param jacobian (output) Where the jacobian will be stored
*/
protected void computeNumericalJacobian( DMatrixRMaj param ,
DMatrixRMaj jacobian )
{
double invDelta = 1.0/DELTA;
function.compute(param, temp0);
// compute the jacobian by perturbing the parameters slightly
// then seeing how it effects the results.
for( int i = 0; i < param.getNumElements(); i++ ) {
param.data[i] += DELTA;
function.compute(param, temp1);
// compute the difference between the two parameters and divide by the delta
// temp1 = (temp1 - temp0)/delta
CommonOps_DDRM.add(invDelta,temp1,-invDelta,temp0,temp1);
// copy the results into the jacobian matrix
// J(i,:) = temp1
CommonOps_DDRM.insert(temp1,jacobian,0,i);
param.data[i] -= DELTA;
}
}
/**
* The function that is being optimized. Returns the residual. f(x) - y
*/
public interface ResidualFunction {
/**
* Computes the residual vector given the set of input parameters
* Function which goes from N input to M outputs
*
* @param param (Input) N by 1 parameter vector
* @param residual (Output) M by 1 output vector to store the residual = f(x)-y
*/
void compute(DMatrixRMaj param , DMatrixRMaj residual );
/**
* Number of functions in output
* @return function count
*/
int numFunctions();
}
}