Question

我有一些嘈杂的数据，我想找出二阶导数的最大值。我正在尝试进行比较：使用滚动平均值对数据进行平滑处理，然后进行二阶导数；使用此答案中找到的方法：Gradient in noisy data, python。

不过，我从SciPy样条曲线获得的结果非常不正确。如何更改参数以获得更好的结果？

具有滚动平均值和np.gradient 的示例 scipy样条法示例

我在这里与整个程序进行了合作：https://github.com/leoUninova/for-stack-excahgne/blob/master/Derivative_fit.ipynb

这是我的数据集的头

        VG         absID                         Name
1520 -2.00  1.264000e-11  20-10-300-350-0.00032-2e-05
1521 -1.95  1.246200e-11  20-10-300-350-0.00032-2e-05
1522 -1.90  9.462000e-12  20-10-300-350-0.00032-2e-05
1523 -1.85  1.198000e-11  20-10-300-350-0.00032-2e-05
1524 -1.80  1.201800e-11  20-10-300-350-0.00032-2e-05

这是代码

#@title derivative plot functions 
#importing packages 
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
from scipy.interpolate import splrep, splev



#plot functions
def plotvtlin(data):
  '''Derivative calculated using rolling to smooth the curve 
  and then numpy.gradient twice.
  '''
  if data.empty:
    print ('passed empty dataframe for stab')
    return ('end') #exiting the loop
  data.drop_duplicates(subset='VG', keep='first', inplace=True) #https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.drop_duplicates.html

  ID_roll = data.absID.rolling(window=5).mean()
  #derivatives
  grad_roll=np.gradient(ID_roll, data.VG)
  grad_roll2=np.gradient(grad_roll, data.VG)

  #VG value where second derivative is max 
  VT=data.VG.iloc[np.nanargmax(grad_roll2)]

  fig, ax = plt.subplots(figsize=(6, 4))
  ax2 = ax.twinx()

  ax.plot((data.VG),(data.absID))
  ax.axvline(VT)

  ax2.plot(data.VG, grad_roll2, linestyle='--', color='red')
  plt.show()

def univariatefunction (data, K, S): 
  '''Gradient for noisy data using spline method 
  https://stackoverflow.com/questions/15862066/gradient-in-noisy-data-python
  '''
  x=np.array(data.VG)
  noisy_data=np.array(data.absID) 
  fig, ax = plt.subplots(figsize=(6, 4))
  ax2 = ax.twinx()
  f = splrep(x,noisy_data,k=5,s=3)
  ax.plot(x, noisy_data, label="noisy data")
  ax2.plot(x, splev(x,f,der=2), label="2nd derivative", linestyle='--', color='red')
  plt.hlines(0,0,2)
  plt.legend(loc=0)
  plt.show()


#@title derivative plots main
#load the data
url='https://raw.githubusercontent.com/leoUninova/for-stack-excahgne/master/one.csv'
df1=pd.read_csv(url)
df1 = df1[df1.Curve == 'Transfer stab']

#make dataset smaller so it's easier to plot
names=list(df1.Name.unique()[:10]) 
df1 = df1[df1['Name'].isin(names)]


for i, name in enumerate (names):
  df2=df1.loc[df1.Name==name]
  plotvtlin(df2)
  univariatefunction (df2, K=3, S=5)

如何计算嘈杂数据集的导数？

0 个答案: