正如标题所说,我很难绘制 MLR。我可以用 2 个变量来做到这一点,但其他任何事情都会导致错误或难看的恐怖。我将如何去策划这个?笔记本输出在破折号之间。
import pandas as pd
data = pd.read_excel('data.xls')
data.head()
---------------------------------------
Protein Fat B SNF Cells
0 3.01 3.44 7.76 313
1 3.18 4.14 8.57 673
2 3.59 4.13 9.06 18
3 3.35 3.86 8.47 34
4 3.41 3.81 8.60 42
---------------------------------------
X = data[['Fat B', 'SNF', 'Cells']]
Y = data['Protein']
from sklearn.model_selection import train_test_split
X_train, x_test, Y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=0)
from sklearn.linear_model import LinearRegression
reg = LinearRegression()
reg.fit(X_train,Y_train)
y_pred = reg.predict(x_test)
print(str(y_test) + " - " + str(y_pred))
---------------------------------------------------------------------------------------------------------------------
16930 3.45
13161 3.01
20475 2.73
11770 3.37
24860 3.55
...
16611 2.85
22789 3.46
12734 3.07
24144 3.13
21275 3.20
Name: Protein, Length: 9585, dtype: float64 - [3.35031197 3.24901896 2.98839754 ... 3.16469215 3.12187003 3.2994102 ]
---------------------------------------------------------------------------------------------------------------------
from sklearn.metrics import r2_score
print(r2_score(y_test,y_pred))
--------------------
0.7017923484495309
--------------------