在这里,我要附加python代码,数据和错误,我想通过分层随机样本方法分割数据,但是它得到了错误。我在这里提到的方法是在这里提到的,让我知道该程序有误。
from sklearn.model_selection import StratifiedShuffleSplit
import pandas as pd
data = pd.read_csv('strat.csv')
data = data[data.columns[0:47]]
req_f = data[data.columns[0:3]]
feature = pd.get_dummies(req_f)
target = data[data.columns[3:]]
sss = StratifiedShuffleSplit( n_splits=5,test_size=0.5, random_state=42)
sss.get_n_splits(feature, target)
for train_index, test_index in sss.split(feature, target):
x_train = feature.iloc[train_index]
x_test = feature.iloc[test_index]
y_train = target.iloc[train_index]
y_test = feature.iloc[test_index]
print(x_test)
print(y_test)
“ strat.csv”如下所示:
ReviewerID,ReviewText ,ProductId,C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,C15,C16,C17,C18,C19,C20,C21,C22,C23,C24,C25,C26,C27,C28,C29,C30,C31,C32,C33,C34,C35,C36,C37,C38,C39,C40,C41,C42,C43,C44
1212,good product,14444425,0,0,1,0,1,0,1,0,0,1,1,1,1,1,0,0,1,0,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1,0,1,0,0,0,0,1,1
1233,will buy again,324532,0,0,1,0,1,0,1,0,0,1,1,1,1,1,0,0,1,0,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1,0,1,0,0,0,0,1,1
5432,not recomended,789654123,0,0,1,0,1,0,0,0,1,1,1,0,1,1,0,0,1,0,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1,0,1,0,0,0,0,1,1
1212,good product,14444425,0,0,1,0,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,0,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,1,1
1233,will buy again,324532,0,0,1,0,1,0,1,0,0,1,1,1,1,1,0,0,1,0,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1,0,1,0,0,0,0,1,1
谢谢