Question

我试图将经验CDF图拟合到两个高斯cdf，因为它似乎有两个峰值，但它不起作用。我使用 scipy.optimize 中的 leastsq 和 scipy.special 中的 erf 函数拟合曲线。拟合只给出值为2的常量线。我不确定代码的哪一部分我犯了错误。任何指针都会有所帮助。谢谢！

%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt

x = np.array([ 90.64115156,  90.85690063,  91.07264971,  91.28839878,
        91.50414786,  91.71989693,  91.93564601,  92.15139508,
        92.36714415,  92.58289323,  92.7986423 ,  93.01439138,
        93.23014045,  93.44588953,  93.6616386 ,  93.87738768,
        94.09313675,  94.30888582,  94.5246349 ,  94.74038397,
        94.95613305,  95.17188212,  95.3876312 ,  95.60338027,
        95.81912935,  96.03487842,  96.2506275 ,  96.46637657,
        96.68212564,  96.89787472,  97.11362379,  97.32937287,
        97.54512194,  97.76087102,  97.97662009,  98.19236917,
        98.40811824,  98.62386731,  98.83961639,  99.05536546,
        99.27111454,  99.48686361,  99.70261269,  99.91836176,
       100.13411084, 100.34985991, 100.56560899, 100.78135806,
       100.99710713, 101.21285621])
y = np.array([3.33333333e-04, 3.33333333e-04, 3.33333333e-04, 1.00000000e-03,
       1.33333333e-03, 3.33333333e-03, 6.66666667e-03, 1.30000000e-02,
       2.36666667e-02, 3.40000000e-02, 5.13333333e-02, 7.36666667e-02,
       1.01666667e-01, 1.38666667e-01, 2.14000000e-01, 3.31000000e-01,
       4.49666667e-01, 5.50000000e-01, 6.09000000e-01, 6.36000000e-01,
       6.47000000e-01, 6.54666667e-01, 6.61000000e-01, 6.67000000e-01,
       6.76333333e-01, 6.84000000e-01, 6.95666667e-01, 7.10000000e-01,
       7.27666667e-01, 7.50666667e-01, 7.75333333e-01, 7.93333333e-01,
       8.11333333e-01, 8.31333333e-01, 8.56333333e-01, 8.81333333e-01,
       9.00666667e-01, 9.22666667e-01, 9.37666667e-01, 9.47333333e-01,
       9.59000000e-01, 9.70333333e-01, 9.77333333e-01, 9.83333333e-01,
       9.90333333e-01, 9.93666667e-01, 9.96333333e-01, 9.99000000e-01,
       9.99666667e-01, 1.00000000e+00])
plt.plot(a,b,'r.')

# Fitting with 2 Gaussian
from scipy.special import erf
from scipy.optimize import leastsq

def two_gaussian_cdf(params, x):
    (mu1, sigma1, mu2, sigma2) = params
    model = 0.5*(1 + erf( (x-mu1)/(sigma1*np.sqrt(2)) )) +\
            0.5*(1 + erf( (x-mu2)/(sigma2*np.sqrt(2)) ))
    return model

def residual_two_gaussian_cdf(params, x, y):
    model = double_gaussian(params, x)
    return model - y

params = [5.,2.,1.,2.]
out = leastsq(residual_two_gaussian_cdf,params,args=(x,y))
double_gaussian(out[0],x)
plt.plot(x,two_gaussian_cdf(out[0],x))

返回此情节

Answer 1

您可能会发现lmfit（请参阅http://lmfit.github.io/lmfit-py/）是leastsq的有用替代品，因为它为优化和曲线拟合提供了更高级别的界面（尽管仍然基于{ {1}}）。使用lmfit，您的示例可能如下所示（删除scipy.optimize.leastsq和x数据的定义）：

这会打印出

的报告

#!/usr/bin/env python  
import numpy as np
from scipy.special import erf
import matplotlib.pyplot as plt
from lmfit import Model

# define the basic model.  I included an amplitude parameter
def gaussian_cdf(x, amp, mu, sigma):
    return (amp/2.0)*(1 + erf( (x-mu)/(sigma*np.sqrt(2))))

# create a model that is the sum of two gaussian_cdfs
# note that a prefix names each component and will be
# applied to the parameter names for each model component
model = Model(gaussian_cdf, prefix='g1_') + Model(gaussian_cdf, prefix='g2_')

# make a parameters object -- a dict with parameter names
# taken from the arguments of your model function and prefix
params = model.make_params(g1_amp=0.50, g1_mu=94, g1_sigma=1,
                           g2_amp=0.50, g2_mu=98, g2_sigma=1.)

# you can apply bounds to any parameter
#params['g1_sigma'].min = 0   # sigma must be > 0!

# you may want to fix the amplitudes to 0.5:
#params['g1_amp'].vary = False
#params['g2_amp'].vary = False

# run the fit
result = model.fit(y, params, x=x)

# print results
print(result.fit_report())

# plot results, including individual components
comps = result.eval_components(result.params, x=x)

plt.plot(x, y,'r.', label='data')
plt.plot(x, result.best_fit, 'k-', label='fit')
plt.plot(x, comps['g1_'], 'b--', label='g1_')
plt.plot(x, comps['g2_'], 'g--', label='g2_')
plt.legend()
plt.show()

和这样的情节：

这种合适并不完美，但它应该让你开始。

Answer 2

以下是我使用scipy.optimize.differential_evolution模块生成曲线拟合的初始参数估计的方法。我将平方误差之和编码为遗传算法的目标，如下所示。这个scipy模块使用Latin Hypercube算法来确保彻底搜索参数空间，这需要在其中进行搜索的参数范围。在这种情况下，参数边界会自动从数据中派生出来，因此无需在代码中手动提供它们。

import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import warnings

from scipy.optimize import differential_evolution
from scipy.special import erf


# bounds on parameters are set in generate_Initial_Parameters() below
def two_gaussian_cdf(x, mu1, sigma1, mu2, sigma2):
    model = 0.5*(1 + erf( (x-mu1)/(sigma1*np.sqrt(2)) )) +\
            0.5*(1 + erf( (x-mu2)/(sigma2*np.sqrt(2)) ))
    return model


# function for genetic algorithm to minimize (sum of squared error)
# bounds on parameters are set in generate_Initial_Parameters() below
def sumOfSquaredError(parameterTuple):
    warnings.filterwarnings("ignore") # do not print warnings by genetic algorithm
    return np.sum((yData - two_gaussian_cdf(xData, *parameterTuple)) ** 2)


def generate_Initial_Parameters():
    # data min and max used for bounds
    maxX = max(xData)
    minX = min(xData)
    maxY = max(yData)
    minY = min(yData)

    parameterBounds = []
    parameterBounds.append([minX, maxX]) # parameter bounds for mu1
    parameterBounds.append([minY, maxY]) # parameter bounds for sigma1
    parameterBounds.append([minX, maxX]) # parameter bounds for mu2
    parameterBounds.append([minY, maxY]) # parameter bounds for sigma2

    # "seed" the numpy random number generator for repeatable results
    result = differential_evolution(sumOfSquaredError, parameterBounds, seed=3)
    return result.x


xData = np.array([ 90.64115156,  90.85690063,  91.07264971,  91.28839878,
        91.50414786,  91.71989693,  91.93564601,  92.15139508,
        92.36714415,  92.58289323,  92.7986423 ,  93.01439138,
        93.23014045,  93.44588953,  93.6616386 ,  93.87738768,
        94.09313675,  94.30888582,  94.5246349 ,  94.74038397,
        94.95613305,  95.17188212,  95.3876312 ,  95.60338027,
        95.81912935,  96.03487842,  96.2506275 ,  96.46637657,
        96.68212564,  96.89787472,  97.11362379,  97.32937287,
        97.54512194,  97.76087102,  97.97662009,  98.19236917,
        98.40811824,  98.62386731,  98.83961639,  99.05536546,
        99.27111454,  99.48686361,  99.70261269,  99.91836176,
       100.13411084, 100.34985991, 100.56560899, 100.78135806,
       100.99710713, 101.21285621])
yData = np.array([3.33333333e-04, 3.33333333e-04, 3.33333333e-04, 1.00000000e-03,
       1.33333333e-03, 3.33333333e-03, 6.66666667e-03, 1.30000000e-02,
       2.36666667e-02, 3.40000000e-02, 5.13333333e-02, 7.36666667e-02,
       1.01666667e-01, 1.38666667e-01, 2.14000000e-01, 3.31000000e-01,
       4.49666667e-01, 5.50000000e-01, 6.09000000e-01, 6.36000000e-01,
       6.47000000e-01, 6.54666667e-01, 6.61000000e-01, 6.67000000e-01,
       6.76333333e-01, 6.84000000e-01, 6.95666667e-01, 7.10000000e-01,
       7.27666667e-01, 7.50666667e-01, 7.75333333e-01, 7.93333333e-01,
       8.11333333e-01, 8.31333333e-01, 8.56333333e-01, 8.81333333e-01,
       9.00666667e-01, 9.22666667e-01, 9.37666667e-01, 9.47333333e-01,
       9.59000000e-01, 9.70333333e-01, 9.77333333e-01, 9.83333333e-01,
       9.90333333e-01, 9.93666667e-01, 9.96333333e-01, 9.99000000e-01,
       9.99666667e-01, 1.00000000e+00])

# generate initial parameter values
initialParameters = generate_Initial_Parameters()

# curve fit the data
fittedParameters, niepewnosci = curve_fit(two_gaussian_cdf, xData, yData, initialParameters)

# create values for display of fitted peak function
mu1, sigma1, mu2, sigma2 = fittedParameters
y_fit = two_gaussian_cdf(xData, mu1, sigma1, mu2, sigma2)

plt.plot(xData, yData) # plot the raw data
plt.plot(xData, y_fit) # plot the equation using the fitted parameters
plt.show()

print(fittedParameters)

使用LeastSq使用2高斯拟合CDF

2 个答案: