我正在使用Skikit Learn的train_test_split函数,它返回除X_test之外的所有数组,它返回奇怪的值,而不是数据集。这是下面的代码。
from sklearn.model_selection import train_test_split
heart_data = np.genfromtxt("cleveland.txt", delimiter=",")
hd_y = heart_data[:,13]
X_train, X_test, y_train,y_test = train_test_split(heart_data,hd_y, test_size=0.2, random_state=42)
print(X_train,y_train)
print(X_test,y_test)
返回此 - (缩短数组,因为它们很长)
#X_train
[[58. 1. 3. ... 0. 7. 0.]
[50. 0. 3. ... 0. 3. 0.]
[70. 1. 4. ... 0. 7. 4.]
...
[61. 1. 4. ... 1. 7. 2.]
[35. 1. 2. ... 0. 3. 0.]
[49. 1. 3. ... 3. 7. 3.]]
#y_train
[0. 0. 4. 2. 1. 3. 0. 0. 0. 0. 0. 2. 3. 1. 1. 0. 0. 2. 0. 0. 0. 0. 2. 4.
0. 0. 0. 1. 1. 0. 0. 2. 1. 2. 1. 3. 2. 0. 1. 0. 1. 2. 2. 2. 1. 3. 0. 1
0. 1. 0. 0. 1. 1. 1. 0. 0. 3. 2. 0. 3.]
#X_test
[[5.30e+01 1.00e+00 4.00e+00 1.40e+02 2.03e+02 1.00e+00 2.00e+00 1.55e+02
1.00e+00 3.10e+00 3.00e+00 0.00e+00 7.00e+00 1.00e+00]
[4.00e+01 1.00e+00 4.00e+00 1.52e+02 2.23e+02 0.00e+00 0.00e+00 1.81e+02
0.00e+00 0.00e+00 1.00e+00 0.00e+00 7.00e+00 1.00e+00]
[5.10e+01 0.00e+00 4.00e+00 1.30e+02 3.05e+02 0.00e+00 0.00e+00 1.42e+02
1.00e+00 1.20e+00 2.00e+00 0.00e+00 7.00e+00 2.00e+00]]
#y_test
[1. 1. 0. 0. 0. 3. 0. 1. 1. 0. 1. 0. 0. 0. 0. 0. 3. 2. 4. 3. 0. 1. 0. 0.
0. 1. 1. 0. 2. 0. 0. 0. 3. 0. 0. 3. 0. 4. 1. 0. 0. 0. 1. 4. 3. 2. 0. 0.
0. 1. 1. 0. 0. 3. 3. 0. 0. 2.]