我正在尝试概括一些代码,以便能够在单个数据集中拟合多个(n从1到> 10)高斯曲线/峰。
使用Scipy Optimize Curve_fit当我为1-3个高斯硬编码函数时,我可以得到很好的拟合,而且对于通用的任意数量的高斯,我设法生成了不会出错的函数。但是,输出拟合非常差。尽管给出了与用于生成“原始”数据的输入参数相同的输入参数-即最佳情况。
此外,在某个时候可能需要从简单的高斯修改特定函数的可能性不为零,但现在应该可以。
下面是我的代码示例,输出图如下所示。
import numpy as np
import pandas as pd
import scipy
import scipy.optimize
import matplotlib.pyplot as plt
from matplotlib import gridspec
amp1 = 1
cen1 = 1
sigma1 = 0.05
df=pd.DataFrame(index=np.linspace(0,10,num=1000),columns=['int'])
def _ngaussian(x, amps,cens,sigmas):
fn = 0
if len(amps)== len(cens)== len(sigmas):
for i in range(len(amps)):
fn = fn+amps[i]*(1/(sigmas[i]*(np.sqrt(2*np.pi))))*\
(np.exp((-1.0/2.0)*(((x-cens[i])/sigmas[i])**2)))
else:
print('Your inputs have unequal lengths')
return fn
amps = [1,1.1,0.9]
cens = [1,2,1.7]
sigmas=[0.05]*3
popt_peaks = [amps,cens,sigmas]
df['peaks'] = _ngaussian(df.index, *popt_peaks)
# Optionally adding noise to the raw data
#noise = np.random.normal(0,0.1,len(df['peaks']))
#df['peaks'] = df['peaks']+noise
def wrapper_fit_func(x, *args):
N = len(args)
a, b, c = list(args[0][:N]),list(args[0][N:N*2]),list(args[0][2*N:3*N])
return _ngaussian(x, a, b, c)
def unwrapper_fit_func(x, *args):
N = int(len(args)/3)
a, b, c = list(args[:N]),list(args[N:N*2]),list(args[2*N:3*N])
return _ngaussian(x, a, b, c)
popt_fitpeaks, pcov_fitpeaks = scipy.optimize.curve_fit(lambda x, *popt_peaks: wrapper_fit_func(x, popt_peaks),
df.index, df['peaks'], p0=popt_peaks,
method='lm')
df['peaks_fit'] = unwrapper_fit_func(df.index, *popt_fitpeaks)
fig = plt.figure(figsize=(8,8))
gs = gridspec.GridSpec(1,1)
ax1 = fig.add_subplot(gs[0])
ax1.set_xlim(0,3)
ax1.plot(df.index, df['peaks'], "b",label='ideal data')
ax1.plot(df.index, df['peaks_fit'], "g",label='fit data')
ax1.legend(loc='upper right')
如果您有兴趣,可以参考分析化学,核磁共振(NMR)和傅立叶变换离子回旋共振质谱(FTICR MS)信号处理。
答案 0 :(得分:1)
您可能会发现HRESULT BindToCsidl(int csidl, REFIID riid, void **ppv) {
HRESULT hr;
PIDLIST_ABSOLUTE pidl;
hr = SHGetSpecialFolderLocation(NULL, csidl, &pidl);
if (SUCCEEDED(hr)) {
IShellFolder *psfDesktop;
hr = SHGetDesktopFolder(&psfDesktop);
if (SUCCEEDED(hr)) {
if (pidl->mkid.cb) {
hr = psfDesktop->BindToObject(pidl, NULL, riid, ppv);
} else {
hr = psfDesktop->QueryInterface(riid, ppv);
}
psfDesktop->Release();
}
CoTaskMemFree(pidl);
}
return hr;
}
void enum_dekstop() {
HRESULT hr = CoInitialize(NULL);
if (SUCCEEDED(hr)) {
IShellFolder2 *psfDesktop;
hr = BindToCsidl(CSIDL_DESKTOP, IID_PPV_ARGS(&psfDesktop));
if (SUCCEEDED(hr)) {
IEnumIDList *peidl;
hr = psfDesktop->EnumObjects(NULL, SHCONTF_FOLDERS | SHCONTF_NONFOLDERS, &peidl);
if (hr == S_OK) {
PIDLIST_ABSOLUTE pidlItem;
while (NOERROR == peidl->Next(1, &pidlItem, NULL)) {
STRRET display_name = {0};
HRESULT hr = psfDesktop->GetDisplayNameOf(pidlItem, SIGDN_NORMALDISPLAY, &display_name);
CoTaskMemFree(pidlItem);
}
}
psfDesktop->Release();
}
CoUninitialize();
}
}
(https://lmfit.github.io/lmfit-py/,披露:我是第一作者)对此很有帮助。它提供了一个易于使用的Model类,用于对数据进行建模,包括用于高斯,Voigt和类似线形的内置Model,可轻松比较模型函数。
lmfit
并打印一份报告
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from lmfit.models import GaussianModel
amp1 = 1
cen1 = 1
sigma1 = 0.05
df=pd.DataFrame(index=np.linspace(0,10,num=1000),columns=['int'])
def _ngaussian(x, amps,cens,sigmas):
fn = 0
if len(amps)== len(cens)== len(sigmas):
for i in range(len(amps)):
fn = fn+amps[i]*(1/(sigmas[i]*(np.sqrt(2*np.pi))))*\
(np.exp((-1.0/2.0)*(((x-cens[i])/sigmas[i])**2)))
fn = fn+np.random.normal(size=len(x), scale=0.05)
else:
print('Your inputs have unequal lengths')
return fn
amps = [1.30, 0.92, 2.11]
cens = [1.10, 1.73, 2.06]
sigmas=[0.05, 0.09, 0.07]
popt_peaks = [amps,cens,sigmas]
df['peaks'] = _ngaussian(df.index, *popt_peaks)
# create a model with 3 Gaussians: pretty easy to generalize
# to a loop to make N peaks
model = (GaussianModel(prefix='p1_') +
GaussianModel(prefix='p2_') +
GaussianModel(prefix='p3_') )
# create Parameters (named from function arguments). For
# Gaussian, Lorentzian, Voigt, etc these are "center", "amplitude", "sigma"
params = model.make_params(p1_center=1.0, p1_amplitude=2, p1_sigma=0.1,
p2_center=1.5, p2_amplitude=2, p2_sigma=0.1,
p3_center=2.0, p3_amplitude=2, p3_sigma=0.1)
# Parameters can have min/max bounds, be fixed (`.vary = False`)
# or constrained to a mathematical expression of other Parameter values
params['p1_center'].min = 0.8
params['p1_center'].max = 1.5
params['p2_center'].min = 1.1
params['p2_center'].max = 1.9
params['p3_center'].min = 1.88
params['p3_center'].max = 3.00
# run the fit
result = model.fit(df['peaks'], params, x=df.index)
# print out the fit results
print(result.fit_report())
# plot results
plt.plot(df.index, df['peaks'], 'o', label='data')
plt.plot(df.index, result.best_fit, '-', label='fit')
plt.legend()
plt.gca().set_xlim(0, 3)
plt.show()