在对数据进行矢量化之前,我必须将数据拆分为训练并进行测试,以避免数据泄漏问题。
X=project_data.values
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test
= train_test_split(X,project_data ['project_is_approved']。values,stratify = 10, test_size = 0.30)