Difference between revisions of "Example Levenberg-Marquardt"
Line 6: | Line 6: | ||
External Resources: | External Resources: | ||
− | * [https://github.com/lessthanoptimal/ejml/blob/v0. | + | * [https://github.com/lessthanoptimal/ejml/blob/v0.31/examples/src/org/ejml/example/LevenbergMarquardt.java LevenbergMarquardt.java code] |
* <disqus>Discuss this example</disqus> | * <disqus>Discuss this example</disqus> | ||
Line 46: | Line 46: | ||
// the optimized parameters and associated costs | // the optimized parameters and associated costs | ||
− | private | + | private DMatrixRMaj param; |
private double initialCost; | private double initialCost; | ||
private double finalCost; | private double finalCost; | ||
// used by matrix operations | // used by matrix operations | ||
− | private | + | private DMatrixRMaj d; |
− | private | + | private DMatrixRMaj H; |
− | private | + | private DMatrixRMaj negDelta; |
− | private | + | private DMatrixRMaj tempParam; |
− | private | + | private DMatrixRMaj A; |
// variables used by the numerical jacobian algorithm | // variables used by the numerical jacobian algorithm | ||
− | private | + | private DMatrixRMaj temp0; |
− | private | + | private DMatrixRMaj temp1; |
// used when computing d and H variables | // used when computing d and H variables | ||
− | private | + | private DMatrixRMaj tempDH; |
// Where the numerical Jacobian is stored. | // Where the numerical Jacobian is stored. | ||
− | private | + | private DMatrixRMaj jacobian; |
/** | /** | ||
Line 79: | Line 79: | ||
int numParam = 1; | int numParam = 1; | ||
− | this.temp0 = new | + | this.temp0 = new DMatrixRMaj(maxElements,1); |
− | this.temp1 = new | + | this.temp1 = new DMatrixRMaj(maxElements,1); |
− | this.tempDH = new | + | this.tempDH = new DMatrixRMaj(maxElements,1); |
− | this.jacobian = new | + | this.jacobian = new DMatrixRMaj(numParam,maxElements); |
this.func = funcCost; | this.func = funcCost; | ||
− | this.param = new | + | this.param = new DMatrixRMaj(numParam,1); |
− | this.d = new | + | this.d = new DMatrixRMaj(numParam,1); |
− | this.H = new | + | this.H = new DMatrixRMaj(numParam,numParam); |
− | this.negDelta = new | + | this.negDelta = new DMatrixRMaj(numParam,1); |
− | this.tempParam = new | + | this.tempParam = new DMatrixRMaj(numParam,1); |
− | this.A = new | + | this.A = new DMatrixRMaj(numParam,numParam); |
} | } | ||
Line 103: | Line 103: | ||
} | } | ||
− | public | + | public DMatrixRMaj getParameters() { |
return param; | return param; | ||
} | } | ||
Line 115: | Line 115: | ||
* @return true if it succeeded and false if it did not. | * @return true if it succeeded and false if it did not. | ||
*/ | */ | ||
− | public boolean optimize( | + | public boolean optimize( DMatrixRMaj initParam , |
− | + | DMatrixRMaj X , | |
− | + | DMatrixRMaj Y ) | |
{ | { | ||
configure(initParam,X,Y); | configure(initParam,X,Y); | ||
Line 138: | Line 138: | ||
* or it iterates too many times | * or it iterates too many times | ||
*/ | */ | ||
− | private boolean adjustParam( | + | private boolean adjustParam(DMatrixRMaj X, DMatrixRMaj Y, |
double prevCost) { | double prevCost) { | ||
// lambda adjusts how big of a step it takes | // lambda adjusts how big of a step it takes | ||
Line 186: | Line 186: | ||
* a matrix it will only declare new memory when needed. | * a matrix it will only declare new memory when needed. | ||
*/ | */ | ||
− | protected void configure( | + | protected void configure(DMatrixRMaj initParam , DMatrixRMaj X , DMatrixRMaj Y ) |
{ | { | ||
if( Y.getNumRows() != X.getNumRows() ) { | if( Y.getNumRows() != X.getNumRows() ) { | ||
Line 222: | Line 222: | ||
* H is an approximation of the hessian. | * H is an approximation of the hessian. | ||
*/ | */ | ||
− | private void computeDandH( | + | private void computeDandH(DMatrixRMaj param , DMatrixRMaj x , DMatrixRMaj y ) |
{ | { | ||
func.compute(param,x, tempDH); | func.compute(param,x, tempDH); | ||
Line 251: | Line 251: | ||
* where I is an identity matrix. | * where I is an identity matrix. | ||
*/ | */ | ||
− | private void computeA( | + | private void computeA(DMatrixRMaj A , DMatrixRMaj H , double lambda ) |
{ | { | ||
final int numParam = param.getNumElements(); | final int numParam = param.getNumElements(); | ||
Line 266: | Line 266: | ||
* cost = (1/N) Sum (f(x;p) - y)^2 | * cost = (1/N) Sum (f(x;p) - y)^2 | ||
*/ | */ | ||
− | private double cost( | + | private double cost(DMatrixRMaj param , DMatrixRMaj X , DMatrixRMaj Y) |
{ | { | ||
func.compute(param,X, temp0); | func.compute(param,X, temp0); | ||
Line 282: | Line 282: | ||
* @param deriv Where the jacobian will be stored | * @param deriv Where the jacobian will be stored | ||
*/ | */ | ||
− | protected void computeNumericalJacobian( | + | protected void computeNumericalJacobian( DMatrixRMaj param , |
− | + | DMatrixRMaj pt , | |
− | + | DMatrixRMaj deriv ) | |
{ | { | ||
double invDelta = 1.0/DELTA; | double invDelta = 1.0/DELTA; | ||
Line 315: | Line 315: | ||
* @param y the resulting output. | * @param y the resulting output. | ||
*/ | */ | ||
− | public void compute( | + | public void compute(DMatrixRMaj param , DMatrixRMaj x , DMatrixRMaj y ); |
} | } | ||
} | } | ||
</syntaxhighlight> | </syntaxhighlight> |
Revision as of 11:38, 18 May 2017
Levenberg-Marquardt is a popular non-linear optimization algorithm. This example demonstrate how a basic implementation of Levenberg-Marquardt can be created using EJML's procedural interface. Unnecessary allocation of new memory is avoided by reshaping matrices. When a matrix is reshaped its width and height is changed but new memory is not declared unless the new shape requires more memory than is available.
The algorithm is provided a function, set of inputs, set of outputs, and an initial estimate of the parameters (this often works with all zeros). It finds the parameters that minimize the difference between the computed output and the observed output. A numerical Jacobian is used to estimate the function's gradient.
Note: This is a simple straight forward implementation of Levenberg-Marquardt and is not as robust as Minpack's implementation. If you are looking for a robust non-linear least-squares minimization library in Java check out DDogleg.
External Resources:
- LevenbergMarquardt.java code
- <disqus>Discuss this example</disqus>
Example Code
/**
* <p>
* This is a straight forward implementation of the Levenberg-Marquardt (LM) algorithm. LM is used to minimize
* non-linear cost functions:<br>
* <br>
* S(P) = Sum{ i=1:m , [y<sub>i</sub> - f(x<sub>i</sub>,P)]<sup>2</sup>}<br>
* <br>
* where P is the set of parameters being optimized.
* </p>
*
* <p>
* In each iteration the parameters are updated using the following equations:<br>
* <br>
* P<sub>i+1</sub> = (H + λ I)<sup>-1</sup> d <br>
* d = (1/N) Sum{ i=1..N , (f(x<sub>i</sub>;P<sub>i</sub>) - y<sub>i</sub>) * jacobian(:,i) } <br>
* H = (1/N) Sum{ i=1..N , jacobian(:,i) * jacobian(:,i)<sup>T</sup> }
* </p>
* <p>
* Whenever possible the allocation of new memory is avoided. This is accomplished by reshaping matrices.
* A matrix that is reshaped won't grow unless the new shape requires more memory than it has available.
* </p>
* @author Peter Abeles
*/
public class LevenbergMarquardt {
// how much the numerical jacobian calculation perturbs the parameters by.
// In better implementation there are better ways to compute this delta. See Numerical Recipes.
private final static double DELTA = 1e-8;
private double initialLambda;
// the function that is optimized
private Function func;
// the optimized parameters and associated costs
private DMatrixRMaj param;
private double initialCost;
private double finalCost;
// used by matrix operations
private DMatrixRMaj d;
private DMatrixRMaj H;
private DMatrixRMaj negDelta;
private DMatrixRMaj tempParam;
private DMatrixRMaj A;
// variables used by the numerical jacobian algorithm
private DMatrixRMaj temp0;
private DMatrixRMaj temp1;
// used when computing d and H variables
private DMatrixRMaj tempDH;
// Where the numerical Jacobian is stored.
private DMatrixRMaj jacobian;
/**
* Creates a new instance that uses the provided cost function.
*
* @param funcCost Cost function that is being optimized.
*/
public LevenbergMarquardt( Function funcCost )
{
this.initialLambda = 1;
// declare data to some initial small size. It will grow later on as needed.
int maxElements = 1;
int numParam = 1;
this.temp0 = new DMatrixRMaj(maxElements,1);
this.temp1 = new DMatrixRMaj(maxElements,1);
this.tempDH = new DMatrixRMaj(maxElements,1);
this.jacobian = new DMatrixRMaj(numParam,maxElements);
this.func = funcCost;
this.param = new DMatrixRMaj(numParam,1);
this.d = new DMatrixRMaj(numParam,1);
this.H = new DMatrixRMaj(numParam,numParam);
this.negDelta = new DMatrixRMaj(numParam,1);
this.tempParam = new DMatrixRMaj(numParam,1);
this.A = new DMatrixRMaj(numParam,numParam);
}
public double getInitialCost() {
return initialCost;
}
public double getFinalCost() {
return finalCost;
}
public DMatrixRMaj getParameters() {
return param;
}
/**
* Finds the best fit parameters.
*
* @param initParam The initial set of parameters for the function.
* @param X The inputs to the function.
* @param Y The "observed" output of the function
* @return true if it succeeded and false if it did not.
*/
public boolean optimize( DMatrixRMaj initParam ,
DMatrixRMaj X ,
DMatrixRMaj Y )
{
configure(initParam,X,Y);
// save the cost of the initial parameters so that it knows if it improves or not
initialCost = cost(param,X,Y);
// iterate until the difference between the costs is insignificant
// or it iterates too many times
if( !adjustParam(X, Y, initialCost) ) {
finalCost = Double.NaN;
return false;
}
return true;
}
/**
* Iterate until the difference between the costs is insignificant
* or it iterates too many times
*/
private boolean adjustParam(DMatrixRMaj X, DMatrixRMaj Y,
double prevCost) {
// lambda adjusts how big of a step it takes
double lambda = initialLambda;
// the difference between the current and previous cost
double difference = 1000;
for( int iter = 0; iter < 20 || difference < 1e-6 ; iter++ ) {
// compute some variables based on the gradient
computeDandH(param,X,Y);
// try various step sizes and see if any of them improve the
// results over what has already been done
boolean foundBetter = false;
for( int i = 0; i < 5; i++ ) {
computeA(A,H,lambda);
if( !solve(A,d,negDelta) ) {
return false;
}
// compute the candidate parameters
subtract(param, negDelta, tempParam);
double cost = cost(tempParam,X,Y);
if( cost < prevCost ) {
// the candidate parameters produced better results so use it
foundBetter = true;
param.set(tempParam);
difference = prevCost - cost;
prevCost = cost;
lambda /= 10.0;
} else {
lambda *= 10.0;
}
}
// it reached a point where it can't improve so exit
if( !foundBetter )
break;
}
finalCost = prevCost;
return true;
}
/**
* Performs sanity checks on the input data and reshapes internal matrices. By reshaping
* a matrix it will only declare new memory when needed.
*/
protected void configure(DMatrixRMaj initParam , DMatrixRMaj X , DMatrixRMaj Y )
{
if( Y.getNumRows() != X.getNumRows() ) {
throw new IllegalArgumentException("Different vector lengths");
} else if( Y.getNumCols() != 1 || X.getNumCols() != 1 ) {
throw new IllegalArgumentException("Inputs must be a column vector");
}
int numParam = initParam.getNumElements();
int numPoints = Y.getNumRows();
if( param.getNumElements() != initParam.getNumElements() ) {
// reshaping a matrix means that new memory is only declared when needed
this.param.reshape(numParam,1, false);
this.d.reshape(numParam,1, false);
this.H.reshape(numParam,numParam, false);
this.negDelta.reshape(numParam,1, false);
this.tempParam.reshape(numParam,1, false);
this.A.reshape(numParam,numParam, false);
}
param.set(initParam);
// reshaping a matrix means that new memory is only declared when needed
temp0.reshape(numPoints,1, false);
temp1.reshape(numPoints,1, false);
tempDH.reshape(numPoints,1, false);
jacobian.reshape(numParam,numPoints, false);
}
/**
* Computes the d and H parameters. Where d is the average error gradient and
* H is an approximation of the hessian.
*/
private void computeDandH(DMatrixRMaj param , DMatrixRMaj x , DMatrixRMaj y )
{
func.compute(param,x, tempDH);
subtractEquals(tempDH, y);
computeNumericalJacobian(param,x,jacobian);
int numParam = param.getNumElements();
int length = x.getNumElements();
// d = average{ (f(x_i;p) - y_i) * jacobian(:,i) }
for( int i = 0; i < numParam; i++ ) {
double total = 0;
for( int j = 0; j < length; j++ ) {
total += tempDH.get(j,0)*jacobian.get(i,j);
}
d.set(i,0,total/length);
}
// compute the approximation of the hessian
multTransB(jacobian,jacobian,H);
scale(1.0/length,H);
}
/**
* A = H + lambda*I <br>
* <br>
* where I is an identity matrix.
*/
private void computeA(DMatrixRMaj A , DMatrixRMaj H , double lambda )
{
final int numParam = param.getNumElements();
A.set(H);
for( int i = 0; i < numParam; i++ ) {
A.set(i,i, A.get(i,i) + lambda);
}
}
/**
* Computes the "cost" for the parameters given.
*
* cost = (1/N) Sum (f(x;p) - y)^2
*/
private double cost(DMatrixRMaj param , DMatrixRMaj X , DMatrixRMaj Y)
{
func.compute(param,X, temp0);
double error = diffNormF(temp0,Y);
return error*error / (double)X.numRows;
}
/**
* Computes a simple numerical Jacobian.
*
* @param param The set of parameters that the Jacobian is to be computed at.
* @param pt The point around which the Jacobian is to be computed.
* @param deriv Where the jacobian will be stored
*/
protected void computeNumericalJacobian( DMatrixRMaj param ,
DMatrixRMaj pt ,
DMatrixRMaj deriv )
{
double invDelta = 1.0/DELTA;
func.compute(param,pt, temp0);
// compute the jacobian by perturbing the parameters slightly
// then seeing how it effects the results.
for( int i = 0; i < param.numRows; i++ ) {
param.data[i] += DELTA;
func.compute(param,pt, temp1);
// compute the difference between the two parameters and divide by the delta
add(invDelta,temp1,-invDelta,temp0,temp1);
// copy the results into the jacobian matrix
System.arraycopy(temp1.data,0,deriv.data,i*pt.numRows,pt.numRows);
param.data[i] -= DELTA;
}
}
/**
* The function that is being optimized.
*/
public interface Function {
/**
* Computes the output for each value in matrix x given the set of parameters.
*
* @param param The parameter for the function.
* @param x the input points.
* @param y the resulting output.
*/
public void compute(DMatrixRMaj param , DMatrixRMaj x , DMatrixRMaj y );
}
}