Question

我有三个偶联的ODE，描述了生物量的增长，底物消耗和产物形成。我对所有3条曲线都有实验结果。

我已成功使用scipy.optimize中的curve_fit分别为每条曲线找到最佳参数，但我找不到适合所有3条曲线的方法。

在代码中，我试图模仿来自similar question的解决方案，但该代码用于更简单的情况并且对我不起作用。

当我运行我的代码时，ODR告诉我的函数返回一个错误形状的数组。由于数学模型能够重现过程的特征，并且我有所有3种浓度的实验结果，我相信我不知道如何将我的数据输入ODR。

如何修复我的代码？或者也许有更好的方法来曲线拟合我的模型？

from scipy import integrate
import numpy as np
from scipy.interpolate import Akima1DInterpolator
from operator import itemgetter
from scipy.odr import Model, Data, ODR

## The Model
def fukuda_solution(beta, t):
    def fukuda(X, t, miu_max, Scr, Yxs, Yes, Yex, Ks, Ki, Aep, Aec):
        biomass   = X[0]
        substrate = X[1]
        ethanol   = X[2]
        W = 0.4 # not relevant in my case
        F = 0.0 # not relevant in my case
        Sf= 0.4 # not relevant in my case
        miu = miu_max * (substrate/(Ks+substrate)) * (Ki/(Ki+ethanol))

        if substrate >= Scr:
            Rep = Aep * (substrate - Scr)
        else:
            Rep = 0.0
        if substrate >= Scr or ethanol <= 0:
            Rec = 0.0
        else:
            Rec = Aec * (Scr - substrate)

        dX = (miu + (Rec/Yex) - F/W) * biomass
        dS = -((miu/Yxs) + (Rep/Yes)) * biomass + (F/W)*(Sf-substrate)
        dE = (Rep-Rec) * biomass - (F/W)*ethanol

        return np.array([dX,dS,dE], dtype=float)

    miu_max, Scr, Yxs, Yes, Yex, Ks, Ki, Aep, Aec = beta
    X0 = np.array([0.85, 71.8, 3.57], dtype=float)  # initial concentrations: biomass, glucose, ethanol

    # X_calculated, infodict = integrate.odeint(fukuda, X0, t, args=(miu_max, Scr, Yxs, Yes, Yex, Ks, Ki, Aep, Aec), full_output=True)

    X_calculated = integrate.odeint(fukuda, X0, t, args=(miu_max, Scr, Yxs, Yes, Yex, Ks, Ki, Aep, Aec))

    print "Shape ravel"
    print np.shape(X_calculated.ravel())
    print "Shape X_calculated"
    print np.shape(X_calculated)
    return X_calculated.ravel()


## Data
## Measurements from lab. experiments
## The glucose is consumed by biomass. The ethanol is produced, but is later consumed 
## when glucose concentration decreases,

## Glucose
t_gly = np.array([0. ,  2.5,   8,       11,     14,     24], dtype=float)
a1_gly= np.array([71.8, 56.75, 9.74,    5.87,   2.57,   0.001], dtype=float)

## Biomass
t_bio = np.array([0.0 , 2.5,  5.0,  8.0,  11.0,  14.0,  18.0,  22.0, 24.0], dtype=float)
a1_bio= np.array([0.85, 2.24, 6.11, 9.41, 10.36, 11.32, 11.60, 11.5, 11.92], dtype=float)

## Ethanol
t_eth = np.array([0.0 , 2.5,  5.0,  8.0,  11.0,  14.0,  18.0,  22.0, 24.0], dtype=float)
a1_eth= np.array([3.57, 6.86, 20.49, 25.23, 19.83, 14.84, 9.56, 5.37, 3.57], dtype=float)


t = np.arange(0, 24., 0.01) 

## Here I interpolate data from experiments. 
## I just need more data points for curve fitting.
g_a1_bio = Akima1DInterpolator(t_bio,a1_bio)
fit_a1_bio = g_a1_bio(t)

g_a1_gly = Akima1DInterpolator(t_gly,a1_gly)
fit_a1_gly = g_a1_gly(t)

g_a1_eth = Akima1DInterpolator(t_eth,a1_eth)
fit_a1_eth = g_a1_eth(t)

## My coupled ODEs have 9 parameters.
## This is the best guess I could get by manually experimenting with parameters.
guess1 = [0.4575, 0.5, 0.36, 0.45, 6.3599, 1.543, 19.694, 0.02, 0.23]

我希望问题隐藏在某处，但我不确定。

## Preparing experimental findings for ODR
data_time = np.repeat(t, 3)
data_experiments = np.array([fit_a1_bio,fit_a1_gly,fit_a1_eth])
data = Data(data_time, data_experiments.ravel())

model = Model(fukuda_solution)

此处代码失败：

odr = ODR(data, model, guess1)
odr.set_job(2)
out = odr.run()
out.pprint()
print out.beta
print out.sd_beta

由于附加注释，我的代码在这里出现在多个块中，但它实际上是一个文件。

Answer 1

我现在正在回答我自己的问题。问题是我在解决错误的问题。由于我有三条曲线，这不是曲线拟合问题。这是基本参数识别（或参数估计）问题。

这是解决类似问题的方法：

实现自定义适应度（目标）函数，该函数可以说明预测对数据的拟合程度。我通过计算所有浓度的总和Mean squared error来做到这一点。为了获得良好的适应性，它会返回较小的值，因为不良的适应性会返回较大的值。
使用全局优化器通过最小化目标函数来查找最佳参数集。我使用basinhopping中的scipy.optimize。

这是最小的工作示例/解决方案。第一个文件'ethanol.py'是主程序，第二个文件'ethanol_model.py'由第一个文件导入，包含模型，目标函数和三个实用函数。福田是模特

<强> ethanol.py

import numpy as np
import matplotlib.pyplot as plt

from ethanol_model import fukuda_solution
from ethanol_model import fukuda_fit

from scipy.optimize import basinhopping
from copy import deepcopy

## Measurements data
# Biomass data
data_X = np.array([0.85, 2.24, 6.11, 9.41, 10.36, 11.32, 11.6, 11.5, 11.92], dtype=float)
# Substrate data with some missing points
data_S = np.array([  8.44800000e+01, 5.67500000e+01,  np.nan, 9.38000000e+00, 5.51000000e+00, 2.21000000e+00, np.nan, np.nan, 1.00000000e-04], dtype=float)
# Product data
data_E = np.array([  3.57, 6.86, 20.49, 25.23, 19.83, 14.84, 9.56, 5.37, 3.57], dtype=float)
## time points when measurements were taken
data_t = np.array([  0., 2.5, 5., 8., 11., 14., 18., 22., 24. ], dtype=float)

# This is a good initial guess, but it works with bad guess also
guess = [0.4575, 0.5, 0.36, 0.45, 6.3599, 1.543, 19.694, 0.02, 0.23]
parameters = deepcopy(guess)

minimizer_kwargs = {"method": "BFGS", "args": (data_X, data_S, data_E, data_t)}
ret = basinhopping(fukuda_fit, parameters, minimizer_kwargs=minimizer_kwargs, niter=200)
print(ret)
paramaters = ret.x

retfun = fukuda_fit(parameters, data_X, data_S, data_E, data_t)
print "Fitness2: {0}". format(retfun)

## The rest is only for visual plotting
t = np.arange(0, 24., 0.01)

results = fukuda_solution(parameters, t, start_X=data_X[0], start_S = data_S[0], start_E=data_E[0])
biomass, glycose, ethanol = results
plt.plot(t, biomass, 'g-', label='Biomass', linewidth=1)
plt.plot(t, glycose, 'y-', label='Substrate', linewidth=1)
plt.plot(t, ethanol, 'b-', label='Ethanol', linewidth=1)
plt.plot(data_t, data_X, 'go', label='Biomass data')
plt.plot(data_t, data_S, 'ys', label='Substrate data')
plt.plot(data_t, data_E, 'b^', label='Ethanol data')
plt.legend(loc='best')
plt.show()
print("-------------------------")

<强> ethanol_model.py

from scipy import integrate
import numpy as np

def fukuda_solution(beta, t, **kwargs):
    def fukuda(X, t, miu_max, Scr, Yxs, Yes, Yex, Ks, Ki, Aep, Aec):
        biomass   = X[0]
        substrate = X[1]
        ethanol   = X[2]

        very_low = 1e-08 # very small number

        miu = miu_max * (substrate/(Ks+substrate)) * (Ki/(Ki+ethanol))
        Rep = (Aep * (substrate - Scr) if substrate >= Scr else 0.0 )
        Rec = (0.0 if substrate >= Scr or ethanol <= 0 else Aec * (Scr - substrate))

        # here I'm artificially limiting concentrations to positive values
        dS = (-((miu/Yxs) + (Rep/Yes)) * biomass if substrate > very_low else 0)
        dX = ((miu + (Rec/Yex)) * biomass if biomass > very_low else 0)
        dE = ((Rep-Rec) * biomass if ethanol > very_low else 0)

        return np.array([dX,dS,dE], dtype=float)

    miu_max,  Scr, Yxs, Yes, Yex, Ks, Ki, Aep, Aec = beta
    X0 = np.array([kwargs['start_X'], kwargs['start_S'], kwargs['start_E']], dtype=float)
    X_calculated = integrate.odeint(fukuda, X0, t, args=(miu_max, Scr, Yxs, Yes, Yex, Ks, Ki, Aep, Aec))
    return np.transpose(X_calculated)


def gmax(*args):
    """Find max in many arrays"""
    lmax = []
    for l in args:
        lmax.append(max(l))
    return  max(lmax)

def gmin(*args):
    """Find min in many arrays"""
    lmin = []
    for l in args:
        lmin.append(min(l))
    return  min(lmin)

def resc(a,r): # a is array, r is related array
    """Normalize (rescale) an array"""
    b = []
    for i in a:
        b.append( (i-gmin(a,r))/(gmax(a,r) - gmin(a,r)) )
    return np.hstack(b)



def fukuda_fit(beta, data_X, data_S, data_E, data_t):
    """Objective function for Fukuda model"""
    start_X = data_X[0]
    start_S = data_S[0]
    start_E = data_E[0]

    result = fukuda_solution(beta,data_t, start_X=start_X, start_S=start_S, start_E=start_E)
    X = result[0]
    S = result[1]
    E = result[2]

    # remove NaN values
    nans = np.isnan(data_X)
    data_X = np.compress(~nans,data_X)
    X = np.compress(~nans,X)

    nans = np.isnan(data_S)
    data_S = np.compress(~nans,data_S)
    S = np.compress(~nans,S)

    nans = np.isnan(data_E)
    data_E = np.compress(~nans,data_E)
    E = np.compress(~nans,E)

    residX = np.sum( np.power(resc(X,data_X)-resc(data_X,X), 2) )
    residS = np.sum( np.power(resc(S,data_S)-resc(data_S,S), 2) )
    residE = np.sum( np.power(resc(E,data_E)-resc(data_E,E), 2) )
    resid = residX + residS + residE

    return 1./(len(data_X) + len(data_S) + len(data_E)) * resid

是的，在解决这些类型的问题时，插值是完全没有必要的。

曲线拟合三个耦合的ODE

1 个答案: