我正在为基于scipy的optimize.leastsq的2D数据编写一个自动曲线拟合程序,它可以工作。然而,当添加许多曲线并且起始值略微偏离时,我得到非物理结果(例如负幅度)。
我发现这篇文章Scipy: bounds for fitting parameter(s) when using optimize.leastsq,并试图根据Cern的Minuit使用参数转换。在上面提到的问题中,有人提供了一些python代码的链接。
code.google.com/p/nmrglue/source/browse/trunk/nmrglue/analysis/leastsqbound.py
我写了这个最小的工作示例(扩展代码)
"""
http://code.google.com/p/nmrglue/source/browse/trunk/nmrglue/analysis/leastsqbound.py
Constrained multivariate Levenberg-Marquardt optimization
"""
from scipy.optimize import leastsq
import numpy as np
import matplotlib.pyplot as plt #new
def internal2external_grad(xi, bounds):
"""
Calculate the internal to external gradiant
Calculates the partial of external over internal
"""
ge = np.empty_like(xi)
for i, (v, bound) in enumerate(zip(xi, bounds)):
a = bound[0] # minimum
b = bound[1] # maximum
if a == None and b == None: # No constraints
ge[i] = 1.0
elif b == None: # only min
ge[i] = v / np.sqrt(v ** 2 + 1)
elif a == None: # only max
ge[i] = -v / np.sqrt(v ** 2 + 1)
else: # both min and max
ge[i] = (b - a) * np.cos(v) / 2.
return ge
def i2e_cov_x(xi, bounds, cov_x):
grad = internal2external_grad(xi, bounds)
grad = grad = np.atleast_2d(grad)
return np.dot(grad.T, grad) * cov_x
def internal2external(xi, bounds):
""" Convert a series of internal variables to external variables"""
xe = np.empty_like(xi)
for i, (v, bound) in enumerate(zip(xi, bounds)):
a = bound[0] # minimum
b = bound[1] # maximum
if a == None and b == None: # No constraints
xe[i] = v
elif b == None: # only min
xe[i] = a - 1. + np.sqrt(v ** 2. + 1.)
elif a == None: # only max
xe[i] = b + 1. - np.sqrt(v ** 2. + 1.)
else: # both min and max
xe[i] = a + ((b - a) / 2.) * (np.sin(v) + 1.)
return xe
def external2internal(xe, bounds):
""" Convert a series of external variables to internal variables"""
xi = np.empty_like(xe)
for i, (v, bound) in enumerate(zip(xe, bounds)):
a = bound[0] # minimum
b = bound[1] # maximum
if a == None and b == None: # No constraints
xi[i] = v
elif b == None: # only min
xi[i] = np.sqrt((v - a + 1.) ** 2. - 1)
elif a == None: # only max
xi[i] = np.sqrt((b - v + 1.) ** 2. - 1)
else: # both min and max
xi[i] = np.arcsin((2.*(v - a) / (b - a)) - 1.)
return xi
def err(p, bounds, efunc, args):
pe = internal2external(p, bounds) # convert to external variables
return efunc(pe, *args)
def calc_cov_x(infodic, p):
"""
Calculate cov_x from fjac, ipvt and p as is done in leastsq
"""
fjac = infodic['fjac']
ipvt = infodic['ipvt']
n = len(p)
# adapted from leastsq function in scipy/optimize/minpack.py
perm = np.take(np.eye(n), ipvt - 1, 0)
r = np.triu(np.transpose(fjac)[:n, :])
R = np.dot(r, perm)
try:
cov_x = np.linalg.inv(np.dot(np.transpose(R), R))
except LinAlgError:
cov_x = None
return cov_x
def leastsqbound(func, x0, bounds, args = (), **kw):
"""
Constrained multivariant Levenberg-Marquard optimization
Minimize the sum of squares of a given function using the
Levenberg-Marquard algorithm. Contraints on parameters are inforced using
variable transformations as described in the MINUIT User's Guide by
Fred James and Matthias Winkler.
Parameters:
* func functions to call for optimization.
* x0 Starting estimate for the minimization.
* bounds (min,max) pair for each element of x, defining the bounds on
that parameter. Use None for one of min or max when there is
no bound in that direction.
* args Any extra arguments to func are places in this tuple.
Returns: (x,{cov_x,infodict,mesg},ier)
Return is described in the scipy.optimize.leastsq function. x and con_v
are corrected to take into account the parameter transformation, infodic
is not corrected.
Additional keyword arguments are passed directly to the
scipy.optimize.leastsq algorithm.
"""
# check for full output
if "full_output" in kw and kw["full_output"]:
full = True
else:
full = False
# convert x0 to internal variables
i0 = external2internal(x0, bounds)
# perfrom unconstrained optimization using internal variables
r = leastsq(err, i0, args = (bounds, func, args), **kw)
# unpack return convert to external variables and return
if full:
xi, cov_xi, infodic, mesg, ier = r
xe = internal2external(xi, bounds)
cov_xe = i2e_cov_x(xi, bounds, cov_xi)
# XXX correct infodic 'fjac','ipvt', and 'qtf'
return xe, cov_xe, infodic, mesg, ier
else:
xi, ier = r
xe = internal2external(xi, bounds)
return xe, ier
# new below
def _evaluate(x, p):
'''
Linear plus Lorentzian curve
p = list with three parameters ([a, b, I, Pos, FWHM])
'''
return p[0] + p[1] * x + p[2] / (1 + np.power((x - p[3]) / (p[4] / 2), 2))
def residuals(p, y, x):
err = _evaluate(x, p) - y
return err
if __name__ == '__main__':
data = np.loadtxt('constraint.dat') # read data
p0 = [5000., 0., 500., 2450., 3] #Start values for a, b, I, Pos, FWHM
constraints = [(4000., None), (-50., 20.), (0., 2000.), (2400., 2451.), (None, None)]
p, res = leastsqbound(residuals, p0, constraints, args = (data[:, 1], data[:, 0]), maxfev = 20000)
print p, res
plt.plot(data[:, 0], data[:, 1]) # plot data
plt.plot(data[:, 0], _evaluate(data[:, 0], p0)) # plot start values
plt.plot(data[:, 0], _evaluate(data[:, 0], p)) # plot fit values
plt.show()
这是情节输出,其中绿色是起始条件,红色是拟合结果:
这是正确的用法吗?如果超出边界,External2内部转换只会抛出一个nan。 leastsq似乎能够处理这个?
我上传了拟合数据here。只需粘贴到名为constraint.dat的文本文件中。
答案 0 :(得分:0)
已经存在受欢迎的受限Lev-Mar代码
http://adsabs.harvard.edu/abs/2009ASPC..411..251M
在python中实现
http://code.google.com/p/astrolibpy/source/browse/mpfit/mpfit.py
我建议不要重新发明轮子。
答案 1 :(得分:0)
按照sega_sai的回答,我使用mpfit.py
提出了这个最小的工作示例import matplotlib.pyplot as plt
from mpfit import mpfit
import numpy as np
def _evaluate(p, x):
'''
Linear plus Lorentzian curve
p = list with three parameters ([a, b, I, Pos, FWHM])
'''
return p[0] + p[1] * x + p[2] / (1 + np.power((x - p[3]) / (p[4] / 2), 2))
def residuals(p, fjac = None, x = None, y = None, err = None):
status = 0
error = _evaluate(p, x) - y
return [status, error / err]
if __name__ == '__main__':
data = np.loadtxt('constraint.dat') # read data
x = data[:, 0]
y = data[:, 1]
err = 0 * np.ones(y.shape, dtype = 'float64')
parinfo = [{'value':5000., 'fixed':0, 'limited':[0, 0], 'limits':[0., 0.], 'parname':'a'},
{'value':0., 'fixed':0, 'limited':[0, 0], 'limits':[0., 0.], 'parname':'b'},
{'value':500., 'fixed':0, 'limited':[0, 0], 'limits':[0., 0.], 'parname':'I'},
{'value':2450., 'fixed':0, 'limited':[0, 0], 'limits':[0., 0.], 'parname':'Pos'},
{'value':3., 'fixed':0, 'limited':[0, 0], 'limits':[0., 0.], 'parname':'FWHM'}]
fa = {'x':x, 'y':y, 'err':err}
m = mpfit(residuals, parinfo = parinfo, functkw = fa)
print m
拟合结果如下:
mpfit.py: 3714.97545, 0.484193283, 2644.47271, 2440.13385, 22.1898496
leastsq: 3714.97187, 0.484194545, 2644.46890, 2440.13391, 22.1899295
结论:两种方法都有效,都允许约束。但是因为mpfit来自一个非常成熟的来源我更信任它。如果可用的话,它还会尊重错误值。
答案 2 :(得分:0)
尝试lmfit-py - https://github.com/newville/lmfit-py
它还通过scipy.optimize.leastsq使用Levenberg-Marquardt(LM)算法。不确定的就好了。
它不仅可以使用边界限制拟合参数,还可以在不修改拟合函数的情况下使用数学表达式约束拟合参数。
忘记在拟合函数中使用那些可怕的p [0],p [1] ....只需通过Parameters类使用拟合参数的名称。