线性回归以适合python中的2D数据

时间:2018-10-25 18:57:47

标签: python machine-learning linear-regression

我有一个函数 Polyfit ,我希望它在x和y处获取数据并使用线性回归返回拟合该数据的2D线。我得到了很好的结果,但是它太好了,我不知道我是否一直正确地进行到最后。

#creating the data and plotting them
np.random.seed(0)
N = 10 # number of data points
x = np.linspace(0,2*np.pi,N)
y = np.sin(x) + np.random.normal(0,.3,x.shape)
plt.figure()
plt.plot(x,y,'o')
plt.xlabel('x')
plt.ylabel('y')
plt.title('2D data (#data = %d)' % N)
plt.show()


def polyfit(x,y,degree,delta):
      #x,y

     X = np.vstack([np.ones(x.shape), x, y]).T
     Y = np.vstack([y]).T
     XtX = np.dot(X.T, X)
     XtY = np.dot(X.T, Y)

     theta = np.dot(np.linalg.inv(XtX), XtY)
     degree = theta.shape[0]

     delta = theta.T * theta
     x_theta = X.T * theta
     pred = np.sum([theta* x])
     loss = np.dot((Y.T - x_theta).T, (Y.T - x_theta))
     c = theta[0] + theta[1] * x[1] + theta[2] * math.pow(x[2],2)

     return pred

result = polyfit(x,y,2,2)
fin = y - result
plt.plot(x, fin,  'go--')

数据图像:

data Image

拟合线的结果:

result of the fitted line

1 个答案:

答案 0 :(得分:0)

这是一个使用numpy的polyfit()进行拟合和numpy的polyval()进行模型预测的多项式拟合示例,以及RMSE和R平方值。

import numpy, scipy, matplotlib
import matplotlib.pyplot as plt

xData = numpy.array([1.1, 2.2, 3.3, 4.4, 5.0, 6.6, 7.7, 0.0])
yData = numpy.array([1.1, 20.2, 30.3, 40.4, 50.0, 60.6, 70.7, 0.1])

polynomialOrder = 2 # example quadratic

# curve fit the test data
fittedParameters = numpy.polyfit(xData, yData, polynomialOrder)
print('Fitted Parameters:', fittedParameters)

modelPredictions = numpy.polyval(fittedParameters, xData)
absError = modelPredictions - yData

SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
print('RMSE:', RMSE)
print('R-squared:', Rsquared)

print()


##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
    f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
    axes = f.add_subplot(111)

    # first the raw data as a scatter plot
    axes.plot(xData, yData,  'D')

    # create data for the fitted equation plot
    xModel = numpy.linspace(min(xData), max(xData))
    yModel = numpy.polyval(fittedParameters, xModel)

    # now the model as a line plot
    axes.plot(xModel, yModel)

    axes.set_xlabel('X Data') # X axis data label
    axes.set_ylabel('Y Data') # Y axis data label

    plt.show()
    plt.close('all') # clean up after using pyplot

graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)