Difference between revisions of "Example Levenberg-Marquardt"
(One intermediate revision by the same user not shown) | |||
Line 1: | Line 1: | ||
− | Levenberg-Marquardt is a popular non-linear optimization algorithm. This example demonstrate how a basic implementation of Levenberg-Marquardt can be created using EJML's [[Procedural|procedural]] interface. Unnecessary allocation of new memory is avoided by reshaping matrices. When a matrix is reshaped its width and height is changed but new memory is not declared unless the new shape requires more memory than is available. | + | Levenberg-Marquardt (LM) is a popular non-linear optimization algorithm. This example demonstrate how a basic implementation of Levenberg-Marquardt can be created using EJML's [[Procedural|procedural]] interface. Unnecessary allocation of new memory is avoided by reshaping matrices. When a matrix is reshaped its width and height is changed but new memory is not declared unless the new shape requires more memory than is available. |
− | + | LM works by being provided a function which computes the residual error. Residual error is defined has the difference between the predicted output and the actual observed output, e.g. f(x)-y. Optimization works | |
+ | by finding a set of parameters which minimize the magnitude of the residuals based on the F2-norm. | ||
'''Note:''' This is a simple straight forward implementation of Levenberg-Marquardt and is not as robust as Minpack's implementation. If you are looking for a robust non-linear least-squares minimization library in Java check out [http://ddogleg.org DDogleg]. | '''Note:''' This is a simple straight forward implementation of Levenberg-Marquardt and is not as robust as Minpack's implementation. If you are looking for a robust non-linear least-squares minimization library in Java check out [http://ddogleg.org DDogleg]. | ||
External Resources: | External Resources: | ||
− | * [https://github.com/lessthanoptimal/ejml/blob/v0. | + | * [https://github.com/lessthanoptimal/ejml/blob/v0.41/examples/src/org/ejml/example/LevenbergMarquardt.java LevenbergMarquardt.java code] |
* <disqus>Discuss this example</disqus> | * <disqus>Discuss this example</disqus> | ||
Line 33: | Line 34: | ||
* A matrix that is reshaped won't grow unless the new shape requires more memory than it has available. | * A matrix that is reshaped won't grow unless the new shape requires more memory than it has available. | ||
* </p> | * </p> | ||
+ | * | ||
* @author Peter Abeles | * @author Peter Abeles | ||
*/ | */ | ||
Line 52: | Line 54: | ||
// the optimized parameters and associated costs | // the optimized parameters and associated costs | ||
− | private DMatrixRMaj candidateParameters = new DMatrixRMaj(1,1); | + | private DMatrixRMaj candidateParameters = new DMatrixRMaj(1, 1); |
private double initialCost; | private double initialCost; | ||
private double finalCost; | private double finalCost; | ||
// used by matrix operations | // used by matrix operations | ||
− | private DMatrixRMaj g = new DMatrixRMaj(1,1); // gradient | + | private DMatrixRMaj g = new DMatrixRMaj(1, 1); // gradient |
− | private DMatrixRMaj H = new DMatrixRMaj(1,1); // Hessian approximation | + | private DMatrixRMaj H = new DMatrixRMaj(1, 1); // Hessian approximation |
− | private DMatrixRMaj Hdiag = new DMatrixRMaj(1,1); | + | private DMatrixRMaj Hdiag = new DMatrixRMaj(1, 1); |
− | private DMatrixRMaj negativeStep = new DMatrixRMaj(1,1); | + | private DMatrixRMaj negativeStep = new DMatrixRMaj(1, 1); |
// variables used by the numerical jacobian algorithm | // variables used by the numerical jacobian algorithm | ||
− | private DMatrixRMaj temp0 = new DMatrixRMaj(1,1); | + | private DMatrixRMaj temp0 = new DMatrixRMaj(1, 1); |
− | private DMatrixRMaj temp1 = new DMatrixRMaj(1,1); | + | private DMatrixRMaj temp1 = new DMatrixRMaj(1, 1); |
// used when computing d and H variables | // used when computing d and H variables | ||
− | private DMatrixRMaj residuals = new DMatrixRMaj(1,1); | + | private DMatrixRMaj residuals = new DMatrixRMaj(1, 1); |
// Where the numerical Jacobian is stored. | // Where the numerical Jacobian is stored. | ||
− | private DMatrixRMaj jacobian = new DMatrixRMaj(1,1); | + | private DMatrixRMaj jacobian = new DMatrixRMaj(1, 1); |
public double getInitialCost() { | public double getInitialCost() { | ||
Line 80: | Line 82: | ||
/** | /** | ||
− | |||
* @param initialLambda Initial value of dampening parameter. Try 1 to start | * @param initialLambda Initial value of dampening parameter. Try 1 to start | ||
*/ | */ | ||
− | public LevenbergMarquardt(double initialLambda) { | + | public LevenbergMarquardt( double initialLambda ) { |
this.initialLambda = initialLambda; | this.initialLambda = initialLambda; | ||
} | } | ||
Line 94: | Line 95: | ||
* @param gtol convergence based on residual magnitude. Try 1e-12 | * @param gtol convergence based on residual magnitude. Try 1e-12 | ||
*/ | */ | ||
− | public void setConvergence( int maxIterations , double ftol , double gtol ) { | + | public void setConvergence( int maxIterations, double ftol, double gtol ) { |
this.maxIterations = maxIterations; | this.maxIterations = maxIterations; | ||
this.ftol = ftol; | this.ftol = ftol; | ||
Line 107: | Line 108: | ||
* @return true if it succeeded and false if it did not. | * @return true if it succeeded and false if it did not. | ||
*/ | */ | ||
− | public boolean optimize(ResidualFunction function, DMatrixRMaj parameters ) | + | public boolean optimize( ResidualFunction function, DMatrixRMaj parameters ) { |
− | + | configure(function, parameters.getNumElements()); | |
− | configure(function,parameters.getNumElements()); | ||
// save the cost of the initial parameters so that it knows if it improves or not | // save the cost of the initial parameters so that it knows if it improves or not | ||
Line 120: | Line 120: | ||
boolean computeHessian = true; | boolean computeHessian = true; | ||
− | for( int iter = 0; iter < maxIterations; iter++ ) { | + | for (int iter = 0; iter < maxIterations; iter++) { |
− | if( computeHessian ) { | + | if (computeHessian) { |
// compute some variables based on the gradient | // compute some variables based on the gradient | ||
computeGradientAndHessian(parameters); | computeGradientAndHessian(parameters); | ||
Line 129: | Line 129: | ||
boolean converged = true; | boolean converged = true; | ||
for (int i = 0; i < g.getNumElements(); i++) { | for (int i = 0; i < g.getNumElements(); i++) { | ||
− | if( Math.abs(g.data[i]) > gtol ) { | + | if (Math.abs(g.data[i]) > gtol) { |
converged = false; | converged = false; | ||
break; | break; | ||
} | } | ||
} | } | ||
− | if( converged ) | + | if (converged) |
return true; | return true; | ||
} | } | ||
Line 140: | Line 140: | ||
// H = H + lambda*I | // H = H + lambda*I | ||
for (int i = 0; i < H.numRows; i++) { | for (int i = 0; i < H.numRows; i++) { | ||
− | H.set(i,i, Hdiag.get(i) + lambda); | + | H.set(i, i, Hdiag.get(i) + lambda); |
} | } | ||
// In robust implementations failure to solve is handled much better | // In robust implementations failure to solve is handled much better | ||
− | if( !CommonOps_DDRM.solve(H, g, negativeStep) ) { | + | if (!CommonOps_DDRM.solve(H, g, negativeStep)) { |
return false; | return false; | ||
} | } | ||
Line 152: | Line 152: | ||
double cost = cost(candidateParameters); | double cost = cost(candidateParameters); | ||
− | if( cost <= previousCost ) { | + | if (cost <= previousCost) { |
// the candidate parameters produced better results so use it | // the candidate parameters produced better results so use it | ||
computeHessian = true; | computeHessian = true; | ||
− | parameters. | + | parameters.setTo(candidateParameters); |
// check for convergence | // check for convergence | ||
// ftol <= (cost(k) - cost(k+1))/cost(k) | // ftol <= (cost(k) - cost(k+1))/cost(k) | ||
− | boolean converged = ftol*previousCost >= previousCost-cost; | + | boolean converged = ftol*previousCost >= previousCost - cost; |
previousCost = cost; | previousCost = cost; | ||
lambda /= 10.0; | lambda /= 10.0; | ||
− | if( converged ) { | + | if (converged) { |
return true; | return true; | ||
} | } | ||
Line 170: | Line 170: | ||
lambda *= 10.0; | lambda *= 10.0; | ||
} | } | ||
− | |||
} | } | ||
finalCost = previousCost; | finalCost = previousCost; | ||
Line 180: | Line 179: | ||
* a matrix it will only declare new memory when needed. | * a matrix it will only declare new memory when needed. | ||
*/ | */ | ||
− | protected void configure(ResidualFunction function , int numParam ) | + | protected void configure( ResidualFunction function, int numParam ) { |
− | |||
this.function = function; | this.function = function; | ||
int numFunctions = function.numFunctions(); | int numFunctions = function.numFunctions(); | ||
// reshaping a matrix means that new memory is only declared when needed | // reshaping a matrix means that new memory is only declared when needed | ||
− | candidateParameters.reshape(numParam,1); | + | candidateParameters.reshape(numParam, 1); |
− | g.reshape(numParam,1); | + | g.reshape(numParam, 1); |
− | H.reshape(numParam,numParam); | + | H.reshape(numParam, numParam); |
− | negativeStep.reshape(numParam,1); | + | negativeStep.reshape(numParam, 1); |
// Normally these variables are thought of as row vectors, but it works out easier if they are column | // Normally these variables are thought of as row vectors, but it works out easier if they are column | ||
− | temp0.reshape(numFunctions,1); | + | temp0.reshape(numFunctions, 1); |
− | temp1.reshape(numFunctions,1); | + | temp1.reshape(numFunctions, 1); |
− | residuals.reshape(numFunctions,1); | + | residuals.reshape(numFunctions, 1); |
− | jacobian.reshape(numFunctions,numParam); | + | jacobian.reshape(numFunctions, numParam); |
} | } | ||
Line 204: | Line 202: | ||
* H = J'*J | * H = J'*J | ||
*/ | */ | ||
− | private void computeGradientAndHessian(DMatrixRMaj param | + | private void computeGradientAndHessian( DMatrixRMaj param ) { |
− | |||
// residuals = f(x) - y | // residuals = f(x) - y | ||
function.compute(param, residuals); | function.compute(param, residuals); | ||
− | computeNumericalJacobian(param,jacobian); | + | computeNumericalJacobian(param, jacobian); |
CommonOps_DDRM.multTransA(jacobian, residuals, g); | CommonOps_DDRM.multTransA(jacobian, residuals, g); | ||
− | CommonOps_DDRM.multTransA(jacobian, jacobian, | + | CommonOps_DDRM.multTransA(jacobian, jacobian, H); |
− | CommonOps_DDRM.extractDiag(H,Hdiag); | + | CommonOps_DDRM.extractDiag(H, Hdiag); |
} | } | ||
− | |||
/** | /** | ||
Line 223: | Line 219: | ||
* cost = (1/N) Sum (f(x) - y)^2 | * cost = (1/N) Sum (f(x) - y)^2 | ||
*/ | */ | ||
− | private double cost(DMatrixRMaj param ) | + | private double cost( DMatrixRMaj param ) { |
− | |||
function.compute(param, residuals); | function.compute(param, residuals); | ||
double error = NormOps_DDRM.normF(residuals); | double error = NormOps_DDRM.normF(residuals); | ||
− | return error*error / (double)residuals.numRows; | + | return error*error/(double)residuals.numRows; |
} | } | ||
Line 238: | Line 233: | ||
* @param jacobian (output) Where the jacobian will be stored | * @param jacobian (output) Where the jacobian will be stored | ||
*/ | */ | ||
− | protected void computeNumericalJacobian( DMatrixRMaj param , | + | protected void computeNumericalJacobian( DMatrixRMaj param, |
− | DMatrixRMaj jacobian ) | + | DMatrixRMaj jacobian ) { |
− | |||
double invDelta = 1.0/DELTA; | double invDelta = 1.0/DELTA; | ||
Line 247: | Line 241: | ||
// compute the jacobian by perturbing the parameters slightly | // compute the jacobian by perturbing the parameters slightly | ||
// then seeing how it effects the results. | // then seeing how it effects the results. | ||
− | for( int i = 0; i < param.getNumElements(); i++ ) { | + | for (int i = 0; i < param.getNumElements(); i++) { |
param.data[i] += DELTA; | param.data[i] += DELTA; | ||
function.compute(param, temp1); | function.compute(param, temp1); | ||
// compute the difference between the two parameters and divide by the delta | // compute the difference between the two parameters and divide by the delta | ||
// temp1 = (temp1 - temp0)/delta | // temp1 = (temp1 - temp0)/delta | ||
− | CommonOps_DDRM.add(invDelta,temp1,-invDelta,temp0,temp1); | + | CommonOps_DDRM.add(invDelta, temp1, -invDelta, temp0, temp1); |
// copy the results into the jacobian matrix | // copy the results into the jacobian matrix | ||
// J(i,:) = temp1 | // J(i,:) = temp1 | ||
− | CommonOps_DDRM.insert(temp1,jacobian,0,i); | + | CommonOps_DDRM.insert(temp1, jacobian, 0, i); |
param.data[i] -= DELTA; | param.data[i] -= DELTA; | ||
Line 273: | Line 267: | ||
* @param residual (Output) M by 1 output vector to store the residual = f(x)-y | * @param residual (Output) M by 1 output vector to store the residual = f(x)-y | ||
*/ | */ | ||
− | void compute(DMatrixRMaj param , DMatrixRMaj residual ); | + | void compute( DMatrixRMaj param, DMatrixRMaj residual ); |
/** | /** | ||
* Number of functions in output | * Number of functions in output | ||
+ | * | ||
* @return function count | * @return function count | ||
*/ | */ |
Latest revision as of 07:26, 7 July 2021
Levenberg-Marquardt (LM) is a popular non-linear optimization algorithm. This example demonstrate how a basic implementation of Levenberg-Marquardt can be created using EJML's procedural interface. Unnecessary allocation of new memory is avoided by reshaping matrices. When a matrix is reshaped its width and height is changed but new memory is not declared unless the new shape requires more memory than is available.
LM works by being provided a function which computes the residual error. Residual error is defined has the difference between the predicted output and the actual observed output, e.g. f(x)-y. Optimization works by finding a set of parameters which minimize the magnitude of the residuals based on the F2-norm.
Note: This is a simple straight forward implementation of Levenberg-Marquardt and is not as robust as Minpack's implementation. If you are looking for a robust non-linear least-squares minimization library in Java check out DDogleg.
External Resources:
- LevenbergMarquardt.java code
- <disqus>Discuss this example</disqus>
Example Code
/**
* <p>
* This is a straight forward implementation of the Levenberg-Marquardt (LM) algorithm. LM is used to minimize
* non-linear cost functions:<br>
* <br>
* S(P) = Sum{ i=1:m , [y<sub>i</sub> - f(x<sub>i</sub>,P)]<sup>2</sup>}<br>
* <br>
* where P is the set of parameters being optimized.
* </p>
*
* <p>
* In each iteration the parameters are updated using the following equations:<br>
* <br>
* P<sub>i+1</sub> = (H + λ I)<sup>-1</sup> d <br>
* d = (1/N) Sum{ i=1..N , (f(x<sub>i</sub>;P<sub>i</sub>) - y<sub>i</sub>) * jacobian(:,i) } <br>
* H = (1/N) Sum{ i=1..N , jacobian(:,i) * jacobian(:,i)<sup>T</sup> }
* </p>
* <p>
* Whenever possible the allocation of new memory is avoided. This is accomplished by reshaping matrices.
* A matrix that is reshaped won't grow unless the new shape requires more memory than it has available.
* </p>
*
* @author Peter Abeles
*/
public class LevenbergMarquardt {
// Convergence criteria
private int maxIterations = 100;
private double ftol = 1e-12;
private double gtol = 1e-12;
// how much the numerical jacobian calculation perturbs the parameters by.
// In better implementation there are better ways to compute this delta. See Numerical Recipes.
private final static double DELTA = 1e-8;
// Dampening. Larger values means it's more like gradient descent
private double initialLambda;
// the function that is optimized
private ResidualFunction function;
// the optimized parameters and associated costs
private DMatrixRMaj candidateParameters = new DMatrixRMaj(1, 1);
private double initialCost;
private double finalCost;
// used by matrix operations
private DMatrixRMaj g = new DMatrixRMaj(1, 1); // gradient
private DMatrixRMaj H = new DMatrixRMaj(1, 1); // Hessian approximation
private DMatrixRMaj Hdiag = new DMatrixRMaj(1, 1);
private DMatrixRMaj negativeStep = new DMatrixRMaj(1, 1);
// variables used by the numerical jacobian algorithm
private DMatrixRMaj temp0 = new DMatrixRMaj(1, 1);
private DMatrixRMaj temp1 = new DMatrixRMaj(1, 1);
// used when computing d and H variables
private DMatrixRMaj residuals = new DMatrixRMaj(1, 1);
// Where the numerical Jacobian is stored.
private DMatrixRMaj jacobian = new DMatrixRMaj(1, 1);
public double getInitialCost() {
return initialCost;
}
public double getFinalCost() {
return finalCost;
}
/**
* @param initialLambda Initial value of dampening parameter. Try 1 to start
*/
public LevenbergMarquardt( double initialLambda ) {
this.initialLambda = initialLambda;
}
/**
* Specifies convergence criteria
*
* @param maxIterations Maximum number of iterations
* @param ftol convergence based on change in function value. try 1e-12
* @param gtol convergence based on residual magnitude. Try 1e-12
*/
public void setConvergence( int maxIterations, double ftol, double gtol ) {
this.maxIterations = maxIterations;
this.ftol = ftol;
this.gtol = gtol;
}
/**
* Finds the best fit parameters.
*
* @param function The function being optimized
* @param parameters (Input/Output) initial parameter estimate and storage for optimized parameters
* @return true if it succeeded and false if it did not.
*/
public boolean optimize( ResidualFunction function, DMatrixRMaj parameters ) {
configure(function, parameters.getNumElements());
// save the cost of the initial parameters so that it knows if it improves or not
double previousCost = initialCost = cost(parameters);
// iterate until the difference between the costs is insignificant
double lambda = initialLambda;
// if it should recompute the Jacobian in this iteration or not
boolean computeHessian = true;
for (int iter = 0; iter < maxIterations; iter++) {
if (computeHessian) {
// compute some variables based on the gradient
computeGradientAndHessian(parameters);
computeHessian = false;
// check for convergence using gradient test
boolean converged = true;
for (int i = 0; i < g.getNumElements(); i++) {
if (Math.abs(g.data[i]) > gtol) {
converged = false;
break;
}
}
if (converged)
return true;
}
// H = H + lambda*I
for (int i = 0; i < H.numRows; i++) {
H.set(i, i, Hdiag.get(i) + lambda);
}
// In robust implementations failure to solve is handled much better
if (!CommonOps_DDRM.solve(H, g, negativeStep)) {
return false;
}
// compute the candidate parameters
CommonOps_DDRM.subtract(parameters, negativeStep, candidateParameters);
double cost = cost(candidateParameters);
if (cost <= previousCost) {
// the candidate parameters produced better results so use it
computeHessian = true;
parameters.setTo(candidateParameters);
// check for convergence
// ftol <= (cost(k) - cost(k+1))/cost(k)
boolean converged = ftol*previousCost >= previousCost - cost;
previousCost = cost;
lambda /= 10.0;
if (converged) {
return true;
}
} else {
lambda *= 10.0;
}
}
finalCost = previousCost;
return true;
}
/**
* Performs sanity checks on the input data and reshapes internal matrices. By reshaping
* a matrix it will only declare new memory when needed.
*/
protected void configure( ResidualFunction function, int numParam ) {
this.function = function;
int numFunctions = function.numFunctions();
// reshaping a matrix means that new memory is only declared when needed
candidateParameters.reshape(numParam, 1);
g.reshape(numParam, 1);
H.reshape(numParam, numParam);
negativeStep.reshape(numParam, 1);
// Normally these variables are thought of as row vectors, but it works out easier if they are column
temp0.reshape(numFunctions, 1);
temp1.reshape(numFunctions, 1);
residuals.reshape(numFunctions, 1);
jacobian.reshape(numFunctions, numParam);
}
/**
* Computes the d and H parameters.
*
* d = J'*(f(x)-y) <--- that's also the gradient
* H = J'*J
*/
private void computeGradientAndHessian( DMatrixRMaj param ) {
// residuals = f(x) - y
function.compute(param, residuals);
computeNumericalJacobian(param, jacobian);
CommonOps_DDRM.multTransA(jacobian, residuals, g);
CommonOps_DDRM.multTransA(jacobian, jacobian, H);
CommonOps_DDRM.extractDiag(H, Hdiag);
}
/**
* Computes the "cost" for the parameters given.
*
* cost = (1/N) Sum (f(x) - y)^2
*/
private double cost( DMatrixRMaj param ) {
function.compute(param, residuals);
double error = NormOps_DDRM.normF(residuals);
return error*error/(double)residuals.numRows;
}
/**
* Computes a simple numerical Jacobian.
*
* @param param (input) The set of parameters that the Jacobian is to be computed at.
* @param jacobian (output) Where the jacobian will be stored
*/
protected void computeNumericalJacobian( DMatrixRMaj param,
DMatrixRMaj jacobian ) {
double invDelta = 1.0/DELTA;
function.compute(param, temp0);
// compute the jacobian by perturbing the parameters slightly
// then seeing how it effects the results.
for (int i = 0; i < param.getNumElements(); i++) {
param.data[i] += DELTA;
function.compute(param, temp1);
// compute the difference between the two parameters and divide by the delta
// temp1 = (temp1 - temp0)/delta
CommonOps_DDRM.add(invDelta, temp1, -invDelta, temp0, temp1);
// copy the results into the jacobian matrix
// J(i,:) = temp1
CommonOps_DDRM.insert(temp1, jacobian, 0, i);
param.data[i] -= DELTA;
}
}
/**
* The function that is being optimized. Returns the residual. f(x) - y
*/
public interface ResidualFunction {
/**
* Computes the residual vector given the set of input parameters
* Function which goes from N input to M outputs
*
* @param param (Input) N by 1 parameter vector
* @param residual (Output) M by 1 output vector to store the residual = f(x)-y
*/
void compute( DMatrixRMaj param, DMatrixRMaj residual );
/**
* Number of functions in output
*
* @return function count
*/
int numFunctions();
}
}