因此,我的程序读取了MPG与重量的关系,并绘制了一个看起来应该是什么样的图形,但是您可以看到该图形看起来不正确。
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#read txt file
dataframe= pd.read_table('auto_data71.txt',delim_whitespace=True,names=['MPG','Cylinder','Displacement','Horsepower','Weight','acceleration','Model year','Origin','Car Name'])
dataframe.dropna(inplace=True)
#filter the un-necessary columns
X = dataframe.iloc[:,4:5].values
Y = dataframe.iloc[:,0:1].values
#scale data
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
sc_Y= StandardScaler()
X = sc_X.fit_transform(X)
Y = sc_Y.fit_transform(Y)
#split data into train and test set
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(X,Y,test_size=0.2)
#create model
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
poly_reg = PolynomialFeatures(degree=2)
poly_X = poly_reg.fit_transform(x_train)
poly_reg.fit(poly_X,y_train)
regressor2= LinearRegression()
regressor2.fit(poly_X,y_train)
#graph
result = regressor2.predict(poly_X)
plt.scatter(x_train,y_train,color='red')
plt.plot(x_train, result,color='blue')
plt.show()
输出是这样的: 如您所见,回归线看起来不正确。任何帮助将不胜感激。
#auto_data.txt(part of data...)
****注意:此代码仅使用weight和mpg列 文件(mpg,气缸,距离,马力,重量,加速度,年份,原产地,名称)
27.0 4. 97.00 88.00 2130. 14.5 71. 3. "datsun pl510"
28.0 4. 140.0 90.00 2264. 15.5 71. 1. "chevrolet vega 2300"
25.0 4. 113.0 95.00 2228. 14.0 71. 3. "toyota corona"
25.0 4. 98.00 NA 2046. 19.0 71. 1. "ford pinto"
NA 4. 97.00 48.00 1978. 20.0 71. 2. "volkswagen super beetle 117"
19.0 6. 232.0 100.0 2634. 13.0 71. 1. "amc gremlin"
16.0 6. 225.0 105.0 3439. 15.5 71. 1. "plymouth satellite custom"
17.0 6. 250.0 100.0 3329. 15.5 71. 1. "chevrolet chevelle malibu"
19.0 6. 250.0 88.00 3302. 15.5 71. 1. "ford torino 500"
18.0 6. 232.0 100.0 3288. 15.5 71. 1. "amc matador"
14.0 8. 350.0 165.0 4209. 12.0 71. 1. "chevrolet impala"
14.0 8. 400.0 175.0 4464. 11.5 71. 1. "pontiac catalina brougham"
14.0 8. 351.0 153.0 4154. 13.5 71. 1. "ford galaxie 500"
14.0 8. 318.0 150.0 4096. 13.0 71. 1. "plymouth fury iii"
12.0 8. 383.0 180.0 4955. 11.5 71. 1. "dodge monaco (sw)"
13.0 8. 400.0 170.0 4746. 12.0 71. 1. "ford country squire (sw)"
13.0 8. 400.0 175.0 5140. 12.0 71. 1. "pontiac safari (sw)"
18.0 6. 258.0 110.0 2962. 13.5 71. 1. "amc hornet sportabout (sw)"
答案 0 :(得分:0)
在绘制之前,您需要对值进行排序。
数据:https://files.fm/u/2g5dxyb4
使用此:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
data = pd.read_csv('data.txt', delim_whitespace=True)
data.dropna(inplace=True)
X = data['weight'].values
Y = data['mpg'].values
X = X.reshape(-1, 1)
Y = Y.reshape(-1, 1)
#scale data
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
sc_Y= StandardScaler()
X = sc_X.fit_transform(X)
Y = sc_Y.fit_transform(Y)
#split data into train and test set
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(X,Y,test_size=0.2)
#create model
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
poly_reg = PolynomialFeatures(degree=2)
poly_X = poly_reg.fit_transform(x_train)
poly_reg.fit(poly_X,y_train)
regressor2= LinearRegression()
regressor2.fit(poly_X,y_train)
#graph
result = regressor2.predict(np.sort(poly_X,axis=0))
plt.scatter(x_train,y_train,color='red')
plt.plot(np.sort(x_train, axis = 0), result,color='blue')
plt.show()