我有三个偶联的ODE,描述了生物量的增长,底物消耗和产物形成。我对所有3条曲线都有实验结果。
我已成功使用scipy.optimize中的curve_fit分别为每条曲线找到最佳参数,但我找不到适合所有3条曲线的方法。
在代码中,我试图模仿来自similar question的解决方案,但该代码用于更简单的情况并且对我不起作用。
当我运行我的代码时,ODR告诉我的函数返回一个错误形状的数组。由于数学模型能够重现过程的特征,并且我有所有3种浓度的实验结果,我相信我不知道如何将我的数据输入ODR。
如何修复我的代码?或者也许有更好的方法来曲线拟合我的模型?
from scipy import integrate
import numpy as np
from scipy.interpolate import Akima1DInterpolator
from operator import itemgetter
from scipy.odr import Model, Data, ODR
## The Model
def fukuda_solution(beta, t):
def fukuda(X, t, miu_max, Scr, Yxs, Yes, Yex, Ks, Ki, Aep, Aec):
biomass = X[0]
substrate = X[1]
ethanol = X[2]
W = 0.4 # not relevant in my case
F = 0.0 # not relevant in my case
Sf= 0.4 # not relevant in my case
miu = miu_max * (substrate/(Ks+substrate)) * (Ki/(Ki+ethanol))
if substrate >= Scr:
Rep = Aep * (substrate - Scr)
else:
Rep = 0.0
if substrate >= Scr or ethanol <= 0:
Rec = 0.0
else:
Rec = Aec * (Scr - substrate)
dX = (miu + (Rec/Yex) - F/W) * biomass
dS = -((miu/Yxs) + (Rep/Yes)) * biomass + (F/W)*(Sf-substrate)
dE = (Rep-Rec) * biomass - (F/W)*ethanol
return np.array([dX,dS,dE], dtype=float)
miu_max, Scr, Yxs, Yes, Yex, Ks, Ki, Aep, Aec = beta
X0 = np.array([0.85, 71.8, 3.57], dtype=float) # initial concentrations: biomass, glucose, ethanol
# X_calculated, infodict = integrate.odeint(fukuda, X0, t, args=(miu_max, Scr, Yxs, Yes, Yex, Ks, Ki, Aep, Aec), full_output=True)
X_calculated = integrate.odeint(fukuda, X0, t, args=(miu_max, Scr, Yxs, Yes, Yex, Ks, Ki, Aep, Aec))
print "Shape ravel"
print np.shape(X_calculated.ravel())
print "Shape X_calculated"
print np.shape(X_calculated)
return X_calculated.ravel()
## Data
## Measurements from lab. experiments
## The glucose is consumed by biomass. The ethanol is produced, but is later consumed
## when glucose concentration decreases,
## Glucose
t_gly = np.array([0. , 2.5, 8, 11, 14, 24], dtype=float)
a1_gly= np.array([71.8, 56.75, 9.74, 5.87, 2.57, 0.001], dtype=float)
## Biomass
t_bio = np.array([0.0 , 2.5, 5.0, 8.0, 11.0, 14.0, 18.0, 22.0, 24.0], dtype=float)
a1_bio= np.array([0.85, 2.24, 6.11, 9.41, 10.36, 11.32, 11.60, 11.5, 11.92], dtype=float)
## Ethanol
t_eth = np.array([0.0 , 2.5, 5.0, 8.0, 11.0, 14.0, 18.0, 22.0, 24.0], dtype=float)
a1_eth= np.array([3.57, 6.86, 20.49, 25.23, 19.83, 14.84, 9.56, 5.37, 3.57], dtype=float)
t = np.arange(0, 24., 0.01)
## Here I interpolate data from experiments.
## I just need more data points for curve fitting.
g_a1_bio = Akima1DInterpolator(t_bio,a1_bio)
fit_a1_bio = g_a1_bio(t)
g_a1_gly = Akima1DInterpolator(t_gly,a1_gly)
fit_a1_gly = g_a1_gly(t)
g_a1_eth = Akima1DInterpolator(t_eth,a1_eth)
fit_a1_eth = g_a1_eth(t)
## My coupled ODEs have 9 parameters.
## This is the best guess I could get by manually experimenting with parameters.
guess1 = [0.4575, 0.5, 0.36, 0.45, 6.3599, 1.543, 19.694, 0.02, 0.23]
我希望问题隐藏在某处,但我不确定。
## Preparing experimental findings for ODR
data_time = np.repeat(t, 3)
data_experiments = np.array([fit_a1_bio,fit_a1_gly,fit_a1_eth])
data = Data(data_time, data_experiments.ravel())
model = Model(fukuda_solution)
此处代码失败:
odr = ODR(data, model, guess1)
odr.set_job(2)
out = odr.run()
out.pprint()
print out.beta
print out.sd_beta
由于附加注释,我的代码在这里出现在多个块中,但它实际上是一个文件。
答案 0 :(得分:0)
我现在正在回答我自己的问题。问题是我在解决错误的问题。由于我有三条曲线,这不是曲线拟合问题。这是基本参数识别(或参数估计)问题。
这是解决类似问题的方法:
basinhopping
中的scipy.optimize
。这是最小的工作示例/解决方案。第一个文件'ethanol.py'是主程序,第二个文件'ethanol_model.py'由第一个文件导入,包含模型,目标函数和三个实用函数。 福田是模特
<强> ethanol.py 强>
import numpy as np
import matplotlib.pyplot as plt
from ethanol_model import fukuda_solution
from ethanol_model import fukuda_fit
from scipy.optimize import basinhopping
from copy import deepcopy
## Measurements data
# Biomass data
data_X = np.array([0.85, 2.24, 6.11, 9.41, 10.36, 11.32, 11.6, 11.5, 11.92], dtype=float)
# Substrate data with some missing points
data_S = np.array([ 8.44800000e+01, 5.67500000e+01, np.nan, 9.38000000e+00, 5.51000000e+00, 2.21000000e+00, np.nan, np.nan, 1.00000000e-04], dtype=float)
# Product data
data_E = np.array([ 3.57, 6.86, 20.49, 25.23, 19.83, 14.84, 9.56, 5.37, 3.57], dtype=float)
## time points when measurements were taken
data_t = np.array([ 0., 2.5, 5., 8., 11., 14., 18., 22., 24. ], dtype=float)
# This is a good initial guess, but it works with bad guess also
guess = [0.4575, 0.5, 0.36, 0.45, 6.3599, 1.543, 19.694, 0.02, 0.23]
parameters = deepcopy(guess)
minimizer_kwargs = {"method": "BFGS", "args": (data_X, data_S, data_E, data_t)}
ret = basinhopping(fukuda_fit, parameters, minimizer_kwargs=minimizer_kwargs, niter=200)
print(ret)
paramaters = ret.x
retfun = fukuda_fit(parameters, data_X, data_S, data_E, data_t)
print "Fitness2: {0}". format(retfun)
## The rest is only for visual plotting
t = np.arange(0, 24., 0.01)
results = fukuda_solution(parameters, t, start_X=data_X[0], start_S = data_S[0], start_E=data_E[0])
biomass, glycose, ethanol = results
plt.plot(t, biomass, 'g-', label='Biomass', linewidth=1)
plt.plot(t, glycose, 'y-', label='Substrate', linewidth=1)
plt.plot(t, ethanol, 'b-', label='Ethanol', linewidth=1)
plt.plot(data_t, data_X, 'go', label='Biomass data')
plt.plot(data_t, data_S, 'ys', label='Substrate data')
plt.plot(data_t, data_E, 'b^', label='Ethanol data')
plt.legend(loc='best')
plt.show()
print("-------------------------")
<强> ethanol_model.py 强>
from scipy import integrate
import numpy as np
def fukuda_solution(beta, t, **kwargs):
def fukuda(X, t, miu_max, Scr, Yxs, Yes, Yex, Ks, Ki, Aep, Aec):
biomass = X[0]
substrate = X[1]
ethanol = X[2]
very_low = 1e-08 # very small number
miu = miu_max * (substrate/(Ks+substrate)) * (Ki/(Ki+ethanol))
Rep = (Aep * (substrate - Scr) if substrate >= Scr else 0.0 )
Rec = (0.0 if substrate >= Scr or ethanol <= 0 else Aec * (Scr - substrate))
# here I'm artificially limiting concentrations to positive values
dS = (-((miu/Yxs) + (Rep/Yes)) * biomass if substrate > very_low else 0)
dX = ((miu + (Rec/Yex)) * biomass if biomass > very_low else 0)
dE = ((Rep-Rec) * biomass if ethanol > very_low else 0)
return np.array([dX,dS,dE], dtype=float)
miu_max, Scr, Yxs, Yes, Yex, Ks, Ki, Aep, Aec = beta
X0 = np.array([kwargs['start_X'], kwargs['start_S'], kwargs['start_E']], dtype=float)
X_calculated = integrate.odeint(fukuda, X0, t, args=(miu_max, Scr, Yxs, Yes, Yex, Ks, Ki, Aep, Aec))
return np.transpose(X_calculated)
def gmax(*args):
"""Find max in many arrays"""
lmax = []
for l in args:
lmax.append(max(l))
return max(lmax)
def gmin(*args):
"""Find min in many arrays"""
lmin = []
for l in args:
lmin.append(min(l))
return min(lmin)
def resc(a,r): # a is array, r is related array
"""Normalize (rescale) an array"""
b = []
for i in a:
b.append( (i-gmin(a,r))/(gmax(a,r) - gmin(a,r)) )
return np.hstack(b)
def fukuda_fit(beta, data_X, data_S, data_E, data_t):
"""Objective function for Fukuda model"""
start_X = data_X[0]
start_S = data_S[0]
start_E = data_E[0]
result = fukuda_solution(beta,data_t, start_X=start_X, start_S=start_S, start_E=start_E)
X = result[0]
S = result[1]
E = result[2]
# remove NaN values
nans = np.isnan(data_X)
data_X = np.compress(~nans,data_X)
X = np.compress(~nans,X)
nans = np.isnan(data_S)
data_S = np.compress(~nans,data_S)
S = np.compress(~nans,S)
nans = np.isnan(data_E)
data_E = np.compress(~nans,data_E)
E = np.compress(~nans,E)
residX = np.sum( np.power(resc(X,data_X)-resc(data_X,X), 2) )
residS = np.sum( np.power(resc(S,data_S)-resc(data_S,S), 2) )
residE = np.sum( np.power(resc(E,data_E)-resc(data_E,E), 2) )
resid = residX + residS + residE
return 1./(len(data_X) + len(data_S) + len(data_E)) * resid
是的,在解决这些类型的问题时,插值是完全没有必要的。