我有一些嘈杂的数据,我想找出二阶导数的最大值。我正在尝试进行比较:使用滚动平均值对数据进行平滑处理,然后进行二阶导数;使用此答案中找到的方法:Gradient in noisy data, python。
不过,我从SciPy样条曲线获得的结果非常不正确。如何更改参数以获得更好的结果?
具有滚动平均值和np.gradient 的示例 scipy样条法示例
我在这里与整个程序进行了合作:https://github.com/leoUninova/for-stack-excahgne/blob/master/Derivative_fit.ipynb
这是我的数据集的头
VG absID Name
1520 -2.00 1.264000e-11 20-10-300-350-0.00032-2e-05
1521 -1.95 1.246200e-11 20-10-300-350-0.00032-2e-05
1522 -1.90 9.462000e-12 20-10-300-350-0.00032-2e-05
1523 -1.85 1.198000e-11 20-10-300-350-0.00032-2e-05
1524 -1.80 1.201800e-11 20-10-300-350-0.00032-2e-05
这是代码
#@title derivative plot functions
#importing packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import splrep, splev
#plot functions
def plotvtlin(data):
'''Derivative calculated using rolling to smooth the curve
and then numpy.gradient twice.
'''
if data.empty:
print ('passed empty dataframe for stab')
return ('end') #exiting the loop
data.drop_duplicates(subset='VG', keep='first', inplace=True) #https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.drop_duplicates.html
ID_roll = data.absID.rolling(window=5).mean()
#derivatives
grad_roll=np.gradient(ID_roll, data.VG)
grad_roll2=np.gradient(grad_roll, data.VG)
#VG value where second derivative is max
VT=data.VG.iloc[np.nanargmax(grad_roll2)]
fig, ax = plt.subplots(figsize=(6, 4))
ax2 = ax.twinx()
ax.plot((data.VG),(data.absID))
ax.axvline(VT)
ax2.plot(data.VG, grad_roll2, linestyle='--', color='red')
plt.show()
def univariatefunction (data, K, S):
'''Gradient for noisy data using spline method
https://stackoverflow.com/questions/15862066/gradient-in-noisy-data-python
'''
x=np.array(data.VG)
noisy_data=np.array(data.absID)
fig, ax = plt.subplots(figsize=(6, 4))
ax2 = ax.twinx()
f = splrep(x,noisy_data,k=5,s=3)
ax.plot(x, noisy_data, label="noisy data")
ax2.plot(x, splev(x,f,der=2), label="2nd derivative", linestyle='--', color='red')
plt.hlines(0,0,2)
plt.legend(loc=0)
plt.show()
#@title derivative plots main
#load the data
url='https://raw.githubusercontent.com/leoUninova/for-stack-excahgne/master/one.csv'
df1=pd.read_csv(url)
df1 = df1[df1.Curve == 'Transfer stab']
#make dataset smaller so it's easier to plot
names=list(df1.Name.unique()[:10])
df1 = df1[df1['Name'].isin(names)]
for i, name in enumerate (names):
df2=df1.loc[df1.Name==name]
plotvtlin(df2)
univariatefunction (df2, K=3, S=5)