尝试使用两个数组运行交叉验证预测时出现上述错误。我查看了此错误的先前帖子,但未能解决问题。非常感谢任何帮助。
#Did customer churn
Y_col = ['recent_cus']
#train_cols = data.columns[1:]
# Index([gre, gpa, prestige_2, prestige_3, prestige_4], dtype=object)
Y = data[Y_col].as_matrix()
X =X_data
X = X.as_matrix()
display(X)
display(Y)
array([[ 9., 2., 0., ..., 2., 2., 0.],
[ 7., 0., 0., ..., 0., 4., 0.],
[ 9., 0., 0., ..., 0., 2., 0.],
...,
[ 9., 2., 0., ..., 2., 2., 0.],
[ 4., 0., 0., ..., 0., 8., 0.],
[ 7., 0., 0., ..., 2., 4., 0.]])
array([[0],
[0],
[0],
...,
[0],
[0],
[0]], dtype=int64)
clf = LogisticRegression(class_weight='auto')
predicted = cross_val_predict(clf, X, Y, cv=10)
fig, ax = plt.subplots()
ax.scatter(Y, predicted)
ax.plot([Y.min(), Y.max()], [Y.min(), Y.max()], 'k--', lw=4)
ax.set_xlabel('Measured')
ax.set_ylabel('Predicted')
plt.show()