Question

我有一堆代表S形函数的x，y点：

x=[ 1.00094909  1.08787635  1.17481363  1.2617564   1.34867881  1.43562284
  1.52259341  1.609522    1.69631283  1.78276102  1.86426648  1.92896789
  1.9464453   1.94941586  2.00062852  2.073691    2.14982808  2.22808316
  2.30634034  2.38456905  2.46280126  2.54106611  2.6193345   2.69748825]
y=[-0.10057627 -0.10172142 -0.10320428 -0.10378959 -0.10348456 -0.10312503
 -0.10276956 -0.10170055 -0.09778279 -0.08608644 -0.05797392  0.00063599
  0.08732999  0.16429878  0.2223306   0.25368884  0.26830932  0.27313931
  0.27308756  0.27048902  0.26626313  0.26139534  0.25634544  0.2509893 ]

我使用scipy.interpolate.UnivariateSpline()来拟合某些三次样条，如下所示：

from scipy.interpolate import UnivariateSpline
s = UnivariateSpline(x, y, k=3, s=0)

xfit = np.linspace(x.min(), x.max(), 200)
plt.scatter(x,y)
plt.plot(xfit, s(xfit))
plt.show()

这就是我得到的：

由于我指定了s=0，所以样条曲线完全附着在数据上，但是摆动太多。使用较高的k值会导致更多的摆动。

所以我的问题是-

如何正确使用scipy.interpolate.UnivariateSpline()来适合我的数据？更准确地说，如何使样条曲线的摆动最小化？
对于这种S型功能，这是否是正确的选择？我是否应该将scipy.optimize.curve_fit()之类的东西与试用版tanh(x)函数一起使用？

Answer 1

有几个选项，下面列出几个。最后一个似乎提供了最佳输出。是否应使用样条曲线或实际函数取决于对输出的处理方式。我在下面列出了两个可以使用的分析函数，但是我不知道数据是在哪种上下文中得出的，因此很难为您找到最佳的函数。

您可以与s一起玩，例如对于s=0.005，情节看起来像这样（虽然还不是很漂亮，但是您可以进一步调整）：

但是我确实会使用“适当的”功能并使用例如curve_fit。下面的函数仍然不是理想的，因为它是单调递增的，因此我们错过了最后的减小；该图如下所示：

这是整个代码，适用于样条曲线和实际拟合：

from scipy.interpolate import UnivariateSpline
import matplotlib.pyplot as plt
import numpy as np
from scipy.optimize import curve_fit


def func(x, ymax, n, k, c):
    return ymax * x ** n / (k ** n + x ** n) + c

x=np.array([ 1.00094909,  1.08787635,  1.17481363,  1.2617564,   1.34867881,  1.43562284,
  1.52259341,  1.609522,    1.69631283,  1.78276102,  1.86426648,  1.92896789,
  1.9464453,   1.94941586,  2.00062852,  2.073691,    2.14982808,  2.22808316,
  2.30634034,  2.38456905,  2.46280126,  2.54106611,  2.6193345,   2.69748825])
y=np.array([-0.10057627, -0.10172142, -0.10320428, -0.10378959, -0.10348456, -0.10312503,
 -0.10276956, -0.10170055, -0.09778279, -0.08608644, -0.05797392,  0.00063599,
  0.08732999,  0.16429878,  0.2223306,   0.25368884,  0.26830932,  0.27313931,
  0.27308756,  0.27048902,  0.26626313,  0.26139534,  0.25634544,  0.2509893 ])


popt, pcov = curve_fit(func, x, y, p0=[y.max(), 2, 2, -0.1], bounds=([0, 0, 0, -0.2], [0.4, 45, 2000, 10]))
xfit = np.linspace(x.min(), x.max(), 200)
plt.scatter(x, y)
plt.plot(xfit, func(xfit, *popt))
plt.show()

s = UnivariateSpline(x, y, k=3, s=0.005)

xfit = np.linspace(x.min(), x.max(), 200)
plt.scatter(x, y)
plt.plot(xfit, s(xfit))
plt.show()

第三个选择是使用更高级的功能，该功能还可以再现结尾处的减小和differential_evolution的适合度；似乎最适合：

代码如下（使用与上面相同的数据）：

from scipy.optimize import curve_fit, differential_evolution    

def sigmoid_with_decay(x, a, b, c, d, e, f):

    return a * (1. / (1. + np.exp(-b * (x - c)))) * (1. / (1. + np.exp(d * (x - e)))) + f

def error_sigmoid_with_decay(parameters, x_data, y_data):

    return np.sum((y_data - sigmoid_with_decay(x_data, *parameters)) ** 2)

res = differential_evolution(error_sigmoid_with_decay,
                             bounds=[(0, 10), (0, 25), (0, 10), (0, 10), (0, 10), (-1, 0.1)],
                             args=(x, y),
                             seed=42)

xfit = np.linspace(x.min(), x.max(), 200)
plt.scatter(x, y)
plt.plot(xfit, sigmoid_with_decay(xfit, *res.x))
plt.show()

拟合度对边界非常敏感，因此在使用该拟合度时要小心...

Answer 2

这说明了将数据的两半拟合为不同的函数的结果，下半部分适用于X <2.0的所有数据，上半部分适用于X> = 1.9的所有数据，因此对于拟合曲线。代码在重叠区域X = 1.95的中心从一个方程式切换到另一个方程式。

import numpy, matplotlib
import matplotlib.pyplot as plt

xData=numpy.array([ 1.00094909,  1.08787635,  1.17481363,  1.2617564,   1.34867881,  1.43562284,
  1.52259341,  1.609522,    1.69631283,  1.78276102,  1.86426648,  1.92896789,
  1.9464453,   1.94941586,  2.00062852,  2.073691,    2.14982808,  2.22808316,
  2.30634034,  2.38456905,  2.46280126,  2.54106611,  2.6193345,   2.69748825])
yData=numpy.array([-0.10057627, -0.10172142, -0.10320428, -0.10378959, -0.10348456, -0.10312503,
 -0.10276956, -0.10170055, -0.09778279, -0.08608644, -0.05797392,  0.00063599,
  0.08732999,  0.16429878,  0.2223306,   0.25368884,  0.26830932,  0.27313931,
  0.27308756,  0.27048902,  0.26626313,  0.26139534,  0.25634544,  0.2509893 ])


# function for x < 1.95 (fitted up to 2.0 for overlap)
def lowerFunc(x_in): # Bleasdale-Nelder Power With Offset
    # coefficients
    a = -1.1431476643503597E+03
    b = 3.3819340844164983E+21
    c = -6.3633178925040745E+01
    d = 3.1481973843740194E+00
    Offset = -1.0300724909782859E-01

    temp = numpy.power(a + b * numpy.power(x_in, c), -1.0 / d)
    temp += Offset
    return temp

# function for x >= 1.95 (fitted down to 1.9 for overlap)
def upperFunc(x_in): # rational equation with Offset
    # coefficients
    a = -2.5294212380048242E-01
    b = 1.4262697377369586E+00
    c = -2.6141935706529118E-01
    d = -8.8730045918252121E-02
    Offset = -4.8283287597672708E-01

    temp = (a * numpy.power(x_in, 2) + b * numpy.log(x_in)) # numerator
    temp /= (1.0 + c * numpy.power(numpy.log(x_in), -1) + d * numpy.exp(x_in)) # denominator
    temp += Offset
    return temp


def combinedFunc(x_in):
    returnVal = []
    for x in x_in:
        if x < 1.95:
            returnVal.append(lowerFunc(x))
        else:
            returnVal.append(upperFunc(x))
    return returnVal


modelPredictions = combinedFunc(xData) 

absError = modelPredictions - yData

SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
print('RMSE:', RMSE)
print('R-squared:', Rsquared)


##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
    f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
    axes = f.add_subplot(111)

    # first the raw data as a scatter plot
    axes.plot(xData, yData,  'D')

    # create data for the fitted equation plot
    xModel = numpy.linspace(min(xData), max(xData))
    yModel = combinedFunc(xModel)

    # now the model as a line plot
    axes.plot(xModel, yModel)

    axes.set_xlabel('X Data') # X axis data label
    axes.set_ylabel('Y Data') # Y axis data label

    plt.show()
    plt.close('all') # clean up after using pyplot

graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)

使用UnivariateSpline紧密拟合数据

2 个答案: