Python Fit任意高斯函数-不合适吗?

时间:2019-11-15 10:21:16

标签: python curve-fitting scipy-optimize

我正在尝试概括一些代码,以便能够在单个数据集中拟合多个(n从1到> 10)高斯曲线/峰。

使用Scipy Optimize Curve_fit当我为1-3个高斯硬编码函数时,我可以得到很好的拟合,而且对于通用的任意数量的高斯,我设法生成了不会出错的函数。但是,输出拟合非常差。尽管给出了与用于生成“原始”数据的输入参数相同的输入参数-即最佳情况。

此外,在某个时候可能需要从简单的高斯修改特定函数的可能性不为零,但现在应该可以。

下面是我的代码示例,输出图如下所示。

import numpy as np
import pandas as pd
import scipy 
import scipy.optimize
import matplotlib.pyplot as plt
from matplotlib import gridspec

amp1 = 1
cen1 = 1
sigma1 = 0.05

df=pd.DataFrame(index=np.linspace(0,10,num=1000),columns=['int'])

def _ngaussian(x, amps,cens,sigmas):
    fn = 0
    if len(amps)== len(cens)== len(sigmas):
        for i in range(len(amps)):
            fn = fn+amps[i]*(1/(sigmas[i]*(np.sqrt(2*np.pi))))*\
            (np.exp((-1.0/2.0)*(((x-cens[i])/sigmas[i])**2)))
    else:
        print('Your inputs have unequal lengths')
    return fn



amps = [1,1.1,0.9]
cens = [1,2,1.7]
sigmas=[0.05]*3

popt_peaks = [amps,cens,sigmas]
df['peaks'] = _ngaussian(df.index, *popt_peaks)

# Optionally adding noise to the raw data
#noise = np.random.normal(0,0.1,len(df['peaks'])) 
#df['peaks'] = df['peaks']+noise

def wrapper_fit_func(x, *args):
    N = len(args)
    a, b, c = list(args[0][:N]),list(args[0][N:N*2]),list(args[0][2*N:3*N])
    return _ngaussian(x, a, b, c)

def unwrapper_fit_func(x, *args):
    N = int(len(args)/3)
    a, b, c = list(args[:N]),list(args[N:N*2]),list(args[2*N:3*N])
    return _ngaussian(x, a, b, c)

popt_fitpeaks, pcov_fitpeaks = scipy.optimize.curve_fit(lambda x, *popt_peaks: wrapper_fit_func(x, popt_peaks), 
                       df.index, df['peaks'], p0=popt_peaks,
                       method='lm')


df['peaks_fit'] = unwrapper_fit_func(df.index, *popt_fitpeaks)


fig = plt.figure(figsize=(8,8))
gs = gridspec.GridSpec(1,1)
ax1 = fig.add_subplot(gs[0])
ax1.set_xlim(0,3)
ax1.plot(df.index, df['peaks'], "b",label='ideal data')
ax1.plot(df.index, df['peaks_fit'], "g",label='fit data')
ax1.legend(loc='upper right')

Bad Curve Fitting of Three Gaussians

如果您有兴趣,可以参考分析化学,核磁共振(NMR)和傅立叶变换离子回旋共振质谱(FTICR MS)信号处理。

1 个答案:

答案 0 :(得分:1)

您可能会发现HRESULT BindToCsidl(int csidl, REFIID riid, void **ppv) { HRESULT hr; PIDLIST_ABSOLUTE pidl; hr = SHGetSpecialFolderLocation(NULL, csidl, &pidl); if (SUCCEEDED(hr)) { IShellFolder *psfDesktop; hr = SHGetDesktopFolder(&psfDesktop); if (SUCCEEDED(hr)) { if (pidl->mkid.cb) { hr = psfDesktop->BindToObject(pidl, NULL, riid, ppv); } else { hr = psfDesktop->QueryInterface(riid, ppv); } psfDesktop->Release(); } CoTaskMemFree(pidl); } return hr; } void enum_dekstop() { HRESULT hr = CoInitialize(NULL); if (SUCCEEDED(hr)) { IShellFolder2 *psfDesktop; hr = BindToCsidl(CSIDL_DESKTOP, IID_PPV_ARGS(&psfDesktop)); if (SUCCEEDED(hr)) { IEnumIDList *peidl; hr = psfDesktop->EnumObjects(NULL, SHCONTF_FOLDERS | SHCONTF_NONFOLDERS, &peidl); if (hr == S_OK) { PIDLIST_ABSOLUTE pidlItem; while (NOERROR == peidl->Next(1, &pidlItem, NULL)) { STRRET display_name = {0}; HRESULT hr = psfDesktop->GetDisplayNameOf(pidlItem, SIGDN_NORMALDISPLAY, &display_name); CoTaskMemFree(pidlItem); } } psfDesktop->Release(); } CoUninitialize(); } } https://lmfit.github.io/lmfit-py/,披露:我是第一作者)对此很有帮助。它提供了一个易于使用的Model类,用于对数据进行建模,包括用于高斯,Voigt和类似线形的内置Model,可轻松比较模型函数。

可以添加(或混合使用)Lmfit模型以创建复合模型,从而轻松支持1、2、3等高斯模型,并且还包括不同的基线函数。上面的链接中有文档和一些示例。对您的示例进行少量重写(包括添加一些干扰)可能看起来像这样:

lmfit

这将产生一个拟合图,如下所示: enter image description here

并打印一份报告

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from lmfit.models import GaussianModel

amp1 = 1
cen1 = 1
sigma1 = 0.05

df=pd.DataFrame(index=np.linspace(0,10,num=1000),columns=['int'])

def _ngaussian(x, amps,cens,sigmas):
    fn = 0
    if len(amps)== len(cens)== len(sigmas):
        for i in range(len(amps)):
            fn = fn+amps[i]*(1/(sigmas[i]*(np.sqrt(2*np.pi))))*\
            (np.exp((-1.0/2.0)*(((x-cens[i])/sigmas[i])**2)))
            fn = fn+np.random.normal(size=len(x), scale=0.05)
    else:
        print('Your inputs have unequal lengths')
    return fn

amps = [1.30, 0.92, 2.11]
cens = [1.10, 1.73, 2.06]
sigmas=[0.05, 0.09, 0.07]

popt_peaks = [amps,cens,sigmas]
df['peaks'] = _ngaussian(df.index, *popt_peaks)

# create a model with 3 Gaussians: pretty easy to generalize
# to a loop to make N peaks
model = (GaussianModel(prefix='p1_') +
         GaussianModel(prefix='p2_') +
         GaussianModel(prefix='p3_') )

# create Parameters (named from function arguments). For
# Gaussian, Lorentzian, Voigt, etc these are "center", "amplitude", "sigma"
params = model.make_params(p1_center=1.0, p1_amplitude=2, p1_sigma=0.1,
                           p2_center=1.5, p2_amplitude=2, p2_sigma=0.1,
                           p3_center=2.0, p3_amplitude=2, p3_sigma=0.1)

# Parameters can have min/max bounds, be fixed (`.vary = False`)
# or constrained to a mathematical expression of other Parameter values
params['p1_center'].min = 0.8
params['p1_center'].max = 1.5

params['p2_center'].min = 1.1
params['p2_center'].max = 1.9

params['p3_center'].min = 1.88
params['p3_center'].max = 3.00

# run the fit
result = model.fit(df['peaks'], params, x=df.index)

# print out the fit results
print(result.fit_report())

# plot results
plt.plot(df.index, df['peaks'],     'o', label='data')
plt.plot(df.index, result.best_fit, '-', label='fit')
plt.legend()
plt.gca().set_xlim(0, 3)
plt.show()