我使用Python来适应我的数据相对较新,所以请原谅我缺乏编程技巧。但是,我无法找到当前曲线拟合尝试所引发的错误的解决方案。我相信这些错误是由于我的模型函数对两个可变参数之一(即Kd)的复杂依赖性。我希望了解具体导致此问题的原因,以及如何调整我的定义或拟合包以避免它。最简单的工作示例:
# Import libraries
import scipy as scipy
from scipy import stats
import numpy as np
from scipy.optimize import curve_fit
np.set_printoptions(precision=4)
ConcSyringeTotal = 9.5 ## total monomer concentration in the syringe [M]tot, in mM
Vinj = 10 ## volume injected in each injection, in uL
Vinit = 1250 ## volume of solvent initially in the sample cell, in uL
Vcell = 1000 ## cell volume, in uL (only the heat change within this volume is measured)
Injections = np.arange(2.00,26.00,1.00)
print Injections
Energy = np.array([136.953, 105.119, 84.414, 69.373, 60.898, 52.813, 46.187, 39.653, 33.894, 29.975, 27.315, 24.200, 21.643, 19.080, 16.158, 13.454, 13.218, 11.568, 10.742, 9.547, 8.693, 7.334, 6.111, 4.741])
print Energy
def DimerDissociation(injection, Kd, DHd): ## a dimer dissociation model for an ITC dilution experiment
## returns the heat flow (y-data, in ucal) per injection (x-data, unitless)
## fit for the dissociation constant (Kd, in mM = mmol/L, umol/mL, nmol/uL)
## and the enthalpy of dissociation (DHd, in ucal/nmol = kcal/mol)
##
## concentration (in mM) of the free monomer in the cell after equilibration of the i-th injection
VolumeAdded = 6+(injection-1)*Vinj ## in uL
VolumeTotal = Vinit + VolumeAdded ## in uL
CellTotal = ConcSyringeTotal*VolumeAdded ## Total in the cell after the i-th injection, in nmol
ConcCellTotal = CellTotal/VolumeTotal ## Total concentration in the cell after the i-th injection, in mM
ConcCellMonomer_roots = np.roots([1, Kd/2, -Kd*ConcCellTotal/2])
ConcCellMonomer_real = ConcCellMonomer_roots.real[abs(ConcCellMonomer_roots.imag)<1e-5]
ConcCellMonomer_positive = ConcCellMonomer_real[ConcCellMonomer_real>0]
ConcCellMonomer = ConcCellMonomer_positive[ConcCellMonomer_positive<ConcCellTotal]
##
## concentration (in mM) of the free monomer in the syringe
ConcSyringeMonomer_roots = np.roots([1, Kd/2, -Kd*ConcSyringeTotal/2])
ConcSyringeMonomer_real = ConcSyringeMonomer_roots.real[abs(ConcSyringeMonomer_roots.imag)<1e-5]
ConcSyringeMonomer_positive = ConcSyringeMonomer_real[ConcSyringeMonomer_real>0]
ConcSyringeMonomer = ConcSyringeMonomer_positive[ConcSyringeMonomer_positive<ConcSyringeTotal]
## nmol of the free monomer injected from the syringe
SyringeMonomerInjected = Vinj*ConcSyringeMonomer[0]
##
## concentration (in mM) of the free monomer in the cell before the i-th injection
VolumeAddedPre = 6+(injection-2)*Vinj
VolumeTotalPre = Vinit + VolumeAddedPre
CellTotalPre = ConcSyringeTotal*VolumeAddedPre
ConcCellTotalPre = CellTotalPre/VolumeTotalPre
ConcCellMonomerPre_roots = np.roots([1, Kd/2, -Kd*ConcCellTotalPre/2])
ConcCellMonomerPre_real = ConcCellMonomerPre_roots.real[abs(ConcCellMonomerPre_roots.imag)<1e-5]
ConcCellMonomerPre_positive = ConcCellMonomerPre_real[ConcCellMonomerPre_real>0]
ConcCellMonomerPre = ConcCellMonomerPre_positive[ConcCellMonomerPre_positive<ConcCellTotalPre]
## nmol of the free monomer in the cell before the i-th injection
CellMonomerPre = VolumeTotalPre*ConcCellMonomerPre[0]
##
## concentration of the free monomer before equilibration of the i-th injection, in mM
ConcCellMonomerBefore = (CellMonomerPre+SyringeMonomerInjected)/VolumeAdded
## concentration of the free monomer after equilibration of the i-th injection, in mM
ConcCellMonomerAfter = ConcCellMonomer[0]
## change in concentration of the free monomer over the equilibration of the i-th injection, in mM
ConcCellMonomerChange = ConcCellMonomerAfter - ConcCellMonomerBefore
##
return Vcell*DHd*ConcCellMonomerChange
DimerDissociation_opt, DimerDissociation_cov = curve_fit(DimerDissociation, Injections, Energy, p0=[0.4,10])
DimerDissociation_stdev = np.sqrt(np.diag(DimerDissociation_cov))
print "optimized parameters:", DimerDissociation_opt
print "covariance matrix:", DimerDissociation_cov
print "standard deviation of fit parameters:", DimerDissociation_stdev
以及相关的错误:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-38-b5ef2361feed> in <module>()
52 ##
53 return Vcell*DHd*ConcCellMonomerChange
---> 54 DimerDissociation_opt, DimerDissociation_cov = curve_fit(DimerDissociation, Injections, Energy, p0=[0.4,10])
55 DimerDissociation_stdev = np.sqrt(np.diag(DimerDissociation_cov))
56 print "optimized parameters:", DimerDissociation_opt
//anaconda/lib/python2.7/site-packages/scipy/optimize/minpack.pyc in curve_fit(f, xdata, ydata, p0, sigma, absolute_sigma, **kw)
553 # Remove full_output from kw, otherwise we're passing it in twice.
554 return_full = kw.pop('full_output', False)
--> 555 res = leastsq(func, p0, args=args, full_output=1, **kw)
556 (popt, pcov, infodict, errmsg, ier) = res
557
//anaconda/lib/python2.7/site-packages/scipy/optimize/minpack.pyc in leastsq(func, x0, args, Dfun, full_output, col_deriv, ftol, xtol, gtol, maxfev, epsfcn, factor, diag)
367 if not isinstance(args, tuple):
368 args = (args,)
--> 369 shape, dtype = _check_func('leastsq', 'func', func, x0, args, n)
370 m = shape[0]
371 if n > m:
//anaconda/lib/python2.7/site-packages/scipy/optimize/minpack.pyc in _check_func(checker, argname, thefunc, x0, args, numinputs, output_shape)
18 def _check_func(checker, argname, thefunc, x0, args, numinputs,
19 output_shape=None):
---> 20 res = atleast_1d(thefunc(*((x0[:numinputs],) + args)))
21 if (output_shape is not None) and (shape(res) != output_shape):
22 if (output_shape[0] != 1):
//anaconda/lib/python2.7/site-packages/scipy/optimize/minpack.pyc in _general_function(params, xdata, ydata, function)
443
444 def _general_function(params, xdata, ydata, function):
--> 445 return function(xdata, *params) - ydata
446
447
<ipython-input-38-b5ef2361feed> in DimerDissociation(injection, Kd, DHd)
19 CellTotal = ConcSyringeTotal*VolumeAdded ## Total in the cell after the i-th injection, in nmol
20 ConcCellTotal = CellTotal/VolumeTotal ## Total concentration in the cell after the i-th injection, in mM
---> 21 ConcCellMonomer_roots = np.roots([1, Kd/2, -Kd*ConcCellTotal/2])
22 ConcCellMonomer_real = ConcCellMonomer_roots.real[abs(ConcCellMonomer_roots.imag)<1e-5]
23 ConcCellMonomer_positive = ConcCellMonomer_real[ConcCellMonomer_real>0]
//anaconda/lib/python2.7/site-packages/numpy/lib/polynomial.pyc in roots(p)
199 """
200 # If input is scalar, this makes it an array
--> 201 p = atleast_1d(p)
202 if len(p.shape) != 1:
203 raise ValueError("Input must be a rank-1 array.")
//anaconda/lib/python2.7/site-packages/numpy/core/shape_base.pyc in atleast_1d(*arys)
47 res = []
48 for ary in arys:
---> 49 ary = asanyarray(ary)
50 if len(ary.shape) == 0 :
51 result = ary.reshape(1)
//anaconda/lib/python2.7/site-packages/numpy/core/numeric.pyc in asanyarray(a, dtype, order)
512
513 """
--> 514 return array(a, dtype, copy=False, order=order, subok=True)
515
516 def ascontiguousarray(a, dtype=None):
ValueError: setting an array element with a sequence.
答案 0 :(得分:2)
问题是numpy.curve_fit
将xdata作为数组传递给目标函数。这意味着injection
中DimerDissociation
上的所有操作实际上都是数组操作。因此,ConcCellTotal
也是一个数组(通过在代码中的第27行插入print type(ConcCellTotal)
来检查)。这意味着您对np.roots
的调用看起来像np.roots([scalar, scalar, array])
,这是错误的来源。
当我处理这些事情时,我总是被扭转,但我认为的想法是优化器的目标函数应该完全矢量化;每次调用时,都需要返回一个数值,每个注入值都有一个能量值。
我通过明确地将ConcCellMonomer_roots
作为一个数组修复了上面的错误,并且我还提出了一些关于变量状态的天真报告:
def DimerDissociation(injection, Kd, DHd):
print 'Called DimerDissociation'
VolumeAdded = 6.0+(injection-1.0)*Vinj ## in uL
VolumeTotal = Vinit + VolumeAdded ## in uL
CellTotal = ConcSyringeTotal*VolumeAdded ## Total in the cell after the i-th injection, in nmol
ConcCellTotal = CellTotal/VolumeTotal ## Total concentration in the cell after the i-th injection, in mM
print 'total\t',np.shape(ConcCellTotal)
ConcCellMonomer_roots = np.asarray([np.roots([1.0, Kd/2.0, -Kd*i/2.0]) for i in ConcCellTotal])
print 'roots\t',np.shape(ConcCellMonomer_roots)
ConcCellMonomer_real = ConcCellMonomer_roots.real[abs(ConcCellMonomer_roots.imag)<1e-5]
print 'real\t',np.shape(ConcCellMonomer_real)
ConcCellMonomer_positive = ConcCellMonomer_real[ConcCellMonomer_real>0]
print 'positive\t',np.shape(ConcCellMonomer_positive)
ConcCellMonomer = ConcCellMonomer_positive[ConcCellMonomer_positive<ConcCellTotal]
print 'monomer\t',np.shape(ConcCellMonomer)
我还使用ConcCellMonomerPre_roots
对np.asarray
进行了相应的更正。通过这些编辑,我让优化器迭代几次,直到ConcCellMonomer_roots
包含一些虚数值。一旦发生这种情况,ConCellMonomer_real
不再与ConcCellTotal
形状相同,因此行ConcCellMonomer_positive[ConcCellMonomer_positive<ConcCellTotal]
会引发广播错误。对DimerDissociation
的调用给出了这个输出:
Called DimerDissociation
total (24,)
roots (24, 2)
real (48,)
positive(24,)
monomer (24,)
直到最后一次迭代:
Called DimerDissociation
total (24,)
roots (24, 2)
real (4,)
positive(4,)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "C:\Anaconda\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 540, in runfile
execfile(filename, namespace)
File "C:/Users/Devin/Documents/Python Scripts/SO.py", line 66, in <module>
DimerDissociation_opt, DimerDissociation_cov = curve_fit(DimerDissociation, Injections, Energy, p0=[0.4,10])
File "C:\Anaconda\lib\site-packages\scipy\optimize\minpack.py", line 533, in curve_fit
res = leastsq(func, p0, args=args, full_output=1, **kw)
File "C:\Anaconda\lib\site-packages\scipy\optimize\minpack.py", line 378, in leastsq
gtol, maxfev, epsfcn, factor, diag)
File "C:\Anaconda\lib\site-packages\scipy\optimize\minpack.py", line 444, in _general_function
return function(xdata, *params) - ydata
File "C:/Users/Devin/Documents/Python Scripts/SO.py", line 35, in DimerDissociation
ConcCellMonomer = ConcCellMonomer_positive[ConcCellMonomer_positive<ConcCellTotal]
ValueError: operands could not be broadcast together with shapes (4) (24)
希望这会让你走上正轨,虽然我不是这里的专家,但其他人可能会有更好的想法。
答案 1 :(得分:0)
我无法重现您的错误。我注意到的第一个问题是你使用np.roots
。 roots(p)
返回由p
中的系数指定的多项式的根,特别是p[0] + p[1] * x + p[2] * x**2 + ...
。你的第三个系数-Kd*ConcCellTotal/2
是injections
的函数,它是一个数组。 np.roots
没有文档签名,允许将数组作为p
的成员之一传递。
你可以编辑和澄清吗?
-Ravi
P.S。一个展示curve_fit
如何工作的玩具示例:
import numpy as np
from scipy.optimize import curve_fit
x_in = np.array([-3.0,-2.0,-1.0,0.0,1.0,2.0,3.0])
def f(x,a,b):
return a*x+b
y_in = f(x_in,3,2)
parameters_fit,cov = curve_fit(f,x_in,y_in)
y_out = parameters_fit[0]*x_in+parameters_fit[1]
print parameters_fit
print y_in
print y_out
y_in = f(x_in,10,75)
parameters_fit,cov = curve_fit(f,x_in,y_in)
y_out = parameters_fit[0]*x_in+parameters_fit[1]
print parameters_fit
print y_in
print y_out
目标函数 f
将x值和一个或多个参数作为参数。 curve_fit
将目标函数,x值 x_in 的数组和y值 y_in 的数组作为参数作为参数。然后它为参数 a 和 b 组成一些值,并在 x_in 上评估目标函数,它给出一个数组 y_out < / strong>即可。它计算 y_in 和 y_out 之间的RMS误差,然后调整 a 和 b 的值,直到RMS出错最小化。
魔鬼真正详细说明了如何选择 a 和 b 的初始值(如果他们没有提供,就像OP那样)和他们如何调整。这对我们scipy.optimize
用户来说非常复杂,但并非绝对必要。