曲线拟合三个耦合的ODE

时间:2015-03-11 15:35:33

标签: python curve-fitting ode

我有三个偶联的ODE,描述了生物量的增长,底物消耗和产物形成。我对所有3条曲线都有实验结果。

我已成功使用scipy.optimize中的curve_fit分别为每条曲线找到最佳参数,但我找不到适合所有3条曲线的方法。

在代码中,我试图模仿来自similar question的解决方案,但该代码用于更简单的情况并且对我不起作用。

当我运行我的代码时,ODR告诉我的函数返回一个错误形状的数组。由于数学模型能够重现过程的特征,并且我有所有3种浓度的实验结果,我相信我不知道如何将我的数据输入ODR。

如何修复我的代码?或者也许有更好的方法来曲线拟合我的模型?

from scipy import integrate
import numpy as np
from scipy.interpolate import Akima1DInterpolator
from operator import itemgetter
from scipy.odr import Model, Data, ODR

## The Model
def fukuda_solution(beta, t):
    def fukuda(X, t, miu_max, Scr, Yxs, Yes, Yex, Ks, Ki, Aep, Aec):
        biomass   = X[0]
        substrate = X[1]
        ethanol   = X[2]
        W = 0.4 # not relevant in my case
        F = 0.0 # not relevant in my case
        Sf= 0.4 # not relevant in my case
        miu = miu_max * (substrate/(Ks+substrate)) * (Ki/(Ki+ethanol))

        if substrate >= Scr:
            Rep = Aep * (substrate - Scr)
        else:
            Rep = 0.0
        if substrate >= Scr or ethanol <= 0:
            Rec = 0.0
        else:
            Rec = Aec * (Scr - substrate)

        dX = (miu + (Rec/Yex) - F/W) * biomass
        dS = -((miu/Yxs) + (Rep/Yes)) * biomass + (F/W)*(Sf-substrate)
        dE = (Rep-Rec) * biomass - (F/W)*ethanol

        return np.array([dX,dS,dE], dtype=float)

    miu_max, Scr, Yxs, Yes, Yex, Ks, Ki, Aep, Aec = beta
    X0 = np.array([0.85, 71.8, 3.57], dtype=float)  # initial concentrations: biomass, glucose, ethanol

    # X_calculated, infodict = integrate.odeint(fukuda, X0, t, args=(miu_max, Scr, Yxs, Yes, Yex, Ks, Ki, Aep, Aec), full_output=True)

    X_calculated = integrate.odeint(fukuda, X0, t, args=(miu_max, Scr, Yxs, Yes, Yex, Ks, Ki, Aep, Aec))

    print "Shape ravel"
    print np.shape(X_calculated.ravel())
    print "Shape X_calculated"
    print np.shape(X_calculated)
    return X_calculated.ravel()


## Data
## Measurements from lab. experiments
## The glucose is consumed by biomass. The ethanol is produced, but is later consumed 
## when glucose concentration decreases,

## Glucose
t_gly = np.array([0. ,  2.5,   8,       11,     14,     24], dtype=float)
a1_gly= np.array([71.8, 56.75, 9.74,    5.87,   2.57,   0.001], dtype=float)

## Biomass
t_bio = np.array([0.0 , 2.5,  5.0,  8.0,  11.0,  14.0,  18.0,  22.0, 24.0], dtype=float)
a1_bio= np.array([0.85, 2.24, 6.11, 9.41, 10.36, 11.32, 11.60, 11.5, 11.92], dtype=float)

## Ethanol
t_eth = np.array([0.0 , 2.5,  5.0,  8.0,  11.0,  14.0,  18.0,  22.0, 24.0], dtype=float)
a1_eth= np.array([3.57, 6.86, 20.49, 25.23, 19.83, 14.84, 9.56, 5.37, 3.57], dtype=float)


t = np.arange(0, 24., 0.01) 

## Here I interpolate data from experiments. 
## I just need more data points for curve fitting.
g_a1_bio = Akima1DInterpolator(t_bio,a1_bio)
fit_a1_bio = g_a1_bio(t)

g_a1_gly = Akima1DInterpolator(t_gly,a1_gly)
fit_a1_gly = g_a1_gly(t)

g_a1_eth = Akima1DInterpolator(t_eth,a1_eth)
fit_a1_eth = g_a1_eth(t)

## My coupled ODEs have 9 parameters.
## This is the best guess I could get by manually experimenting with parameters.
guess1 = [0.4575, 0.5, 0.36, 0.45, 6.3599, 1.543, 19.694, 0.02, 0.23]

我希望问题隐藏在某处,但我不确定。

## Preparing experimental findings for ODR
data_time = np.repeat(t, 3)
data_experiments = np.array([fit_a1_bio,fit_a1_gly,fit_a1_eth])
data = Data(data_time, data_experiments.ravel())

model = Model(fukuda_solution)

此处代码失败:

odr = ODR(data, model, guess1)
odr.set_job(2)
out = odr.run()
out.pprint()
print out.beta
print out.sd_beta

由于附加注释,我的代码在这里出现在多个块中,但它实际上是一个文件。

1 个答案:

答案 0 :(得分:0)

我现在正在回答我自己的问题。问题是我在解决错误的问题。由于我有三条曲线,这不是曲线拟合问题。这是基本参数识别(或参数估计)问题。

这是解决类似问题的方法:

  1. 实现自定义适应度(目标)函数,该函数可以说明预测对数据的拟合程度。我通过计算所有浓度的总和Mean squared error来做到这一点。为了获得良好的适应性,它会返回较小的值,因为不良的适应性会返回较大的值。
  2. 使用全局优化器通过最小化目标函数来查找最佳参数集。我使用basinhopping中的scipy.optimize
  3. 这是最小的工作示例/解决方案。第一个文件'ethanol.py'是主程序,第二个文件'ethanol_model.py'由第一个文件导入,包含模型,目标函数和三个实用函数。 福田是模特

    <强> ethanol.py

    import numpy as np
    import matplotlib.pyplot as plt
    
    from ethanol_model import fukuda_solution
    from ethanol_model import fukuda_fit
    
    from scipy.optimize import basinhopping
    from copy import deepcopy
    
    ## Measurements data
    # Biomass data
    data_X = np.array([0.85, 2.24, 6.11, 9.41, 10.36, 11.32, 11.6, 11.5, 11.92], dtype=float)
    # Substrate data with some missing points
    data_S = np.array([  8.44800000e+01, 5.67500000e+01,  np.nan, 9.38000000e+00, 5.51000000e+00, 2.21000000e+00, np.nan, np.nan, 1.00000000e-04], dtype=float)
    # Product data
    data_E = np.array([  3.57, 6.86, 20.49, 25.23, 19.83, 14.84, 9.56, 5.37, 3.57], dtype=float)
    ## time points when measurements were taken
    data_t = np.array([  0., 2.5, 5., 8., 11., 14., 18., 22., 24. ], dtype=float)
    
    # This is a good initial guess, but it works with bad guess also
    guess = [0.4575, 0.5, 0.36, 0.45, 6.3599, 1.543, 19.694, 0.02, 0.23]
    parameters = deepcopy(guess)
    
    minimizer_kwargs = {"method": "BFGS", "args": (data_X, data_S, data_E, data_t)}
    ret = basinhopping(fukuda_fit, parameters, minimizer_kwargs=minimizer_kwargs, niter=200)
    print(ret)
    paramaters = ret.x
    
    retfun = fukuda_fit(parameters, data_X, data_S, data_E, data_t)
    print "Fitness2: {0}". format(retfun)
    
    ## The rest is only for visual plotting
    t = np.arange(0, 24., 0.01)
    
    results = fukuda_solution(parameters, t, start_X=data_X[0], start_S = data_S[0], start_E=data_E[0])
    biomass, glycose, ethanol = results
    plt.plot(t, biomass, 'g-', label='Biomass', linewidth=1)
    plt.plot(t, glycose, 'y-', label='Substrate', linewidth=1)
    plt.plot(t, ethanol, 'b-', label='Ethanol', linewidth=1)
    plt.plot(data_t, data_X, 'go', label='Biomass data')
    plt.plot(data_t, data_S, 'ys', label='Substrate data')
    plt.plot(data_t, data_E, 'b^', label='Ethanol data')
    plt.legend(loc='best')
    plt.show()
    print("-------------------------")
    

    <强> ethanol_model.py

    from scipy import integrate
    import numpy as np
    
    def fukuda_solution(beta, t, **kwargs):
        def fukuda(X, t, miu_max, Scr, Yxs, Yes, Yex, Ks, Ki, Aep, Aec):
            biomass   = X[0]
            substrate = X[1]
            ethanol   = X[2]
    
            very_low = 1e-08 # very small number
    
            miu = miu_max * (substrate/(Ks+substrate)) * (Ki/(Ki+ethanol))
            Rep = (Aep * (substrate - Scr) if substrate >= Scr else 0.0 )
            Rec = (0.0 if substrate >= Scr or ethanol <= 0 else Aec * (Scr - substrate))
    
            # here I'm artificially limiting concentrations to positive values
            dS = (-((miu/Yxs) + (Rep/Yes)) * biomass if substrate > very_low else 0)
            dX = ((miu + (Rec/Yex)) * biomass if biomass > very_low else 0)
            dE = ((Rep-Rec) * biomass if ethanol > very_low else 0)
    
            return np.array([dX,dS,dE], dtype=float)
    
        miu_max,  Scr, Yxs, Yes, Yex, Ks, Ki, Aep, Aec = beta
        X0 = np.array([kwargs['start_X'], kwargs['start_S'], kwargs['start_E']], dtype=float)
        X_calculated = integrate.odeint(fukuda, X0, t, args=(miu_max, Scr, Yxs, Yes, Yex, Ks, Ki, Aep, Aec))
        return np.transpose(X_calculated)
    
    
    def gmax(*args):
        """Find max in many arrays"""
        lmax = []
        for l in args:
            lmax.append(max(l))
        return  max(lmax)
    
    def gmin(*args):
        """Find min in many arrays"""
        lmin = []
        for l in args:
            lmin.append(min(l))
        return  min(lmin)
    
    def resc(a,r): # a is array, r is related array
        """Normalize (rescale) an array"""
        b = []
        for i in a:
            b.append( (i-gmin(a,r))/(gmax(a,r) - gmin(a,r)) )
        return np.hstack(b)
    
    
    
    def fukuda_fit(beta, data_X, data_S, data_E, data_t):
        """Objective function for Fukuda model"""
        start_X = data_X[0]
        start_S = data_S[0]
        start_E = data_E[0]
    
        result = fukuda_solution(beta,data_t, start_X=start_X, start_S=start_S, start_E=start_E)
        X = result[0]
        S = result[1]
        E = result[2]
    
        # remove NaN values
        nans = np.isnan(data_X)
        data_X = np.compress(~nans,data_X)
        X = np.compress(~nans,X)
    
        nans = np.isnan(data_S)
        data_S = np.compress(~nans,data_S)
        S = np.compress(~nans,S)
    
        nans = np.isnan(data_E)
        data_E = np.compress(~nans,data_E)
        E = np.compress(~nans,E)
    
        residX = np.sum( np.power(resc(X,data_X)-resc(data_X,X), 2) )
        residS = np.sum( np.power(resc(S,data_S)-resc(data_S,S), 2) )
        residE = np.sum( np.power(resc(E,data_E)-resc(data_E,E), 2) )
        resid = residX + residS + residE
    
        return 1./(len(data_X) + len(data_S) + len(data_E)) * resid
    

    是的,在解决这些类型的问题时,插值是完全没有必要的。