我正在尝试使用不同的分类器训练模型。并希望在csv文件中保存每个分类器分类报告以获取类的分数值。但是当我尝试执行此操作时,出现以下错误。 我真的很感激,如果有人可以帮我解决这个问题,我可以单独测试每个分类器并保存到文件。但它会增加我真正不想要的代码。
raise ValueError("Mix type of y not allowed, got types %s" % ys_types)
ValueError: Mix type of y not allowed, got types {'multiclass', 'continuous-
multioutput'}
**this is the code am trying with:**
from sklearn.metrics import accuracy_score, log_loss
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier,
GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
,电话,BASE_ID,CELL_ID,ULTraffic,TCPSYNAtteDelay,日期
1,13548599180,54,251,469,28,20160718034300.0 2,13548599180,1,46,1273,31,20160725023800.0 3,13548599180,54,251,1273,25,20160714012400.0 4,13548599180,54,251,1273,31,20160714085200.0 5,13548599180,54,251,1273,31,20160718034200.0 6,13548599180,54,251,1273,25,20160713082200.0 7,13548599180,54,251,2373,33,20160713091800.0 8,13548599180,54,251,2373,33,20160713091800.0 9,13548599180,54,251,639,31,20160714015300.0 10,13548599180,92,278,690,28,20160704010900.0 11,13548599180,92,278,693,28,20160704010900.0 12,13548599180,92,278,689,31,20160704010900.0 13,13548599180,92,278,1060,25,20160704010900.0 14,13548599180,54,251,1339,29,20160713091500.0 15,13548599180,54,251,1202,29,20160713091500.0 16,13548599180,54,251,975,26,20160713091500.0 17,13548599180,54,251,745,29,20160714015300.0 18,13548599180,54,251,707,27,20160714015300.0
url = 'path\stackflow_sample.csv'
names = ['Phone','Base_ID', 'Cell_ID', 'ULTraffic',
'TCPSYNAtteDelay','date']
data = pd.read_csv(url, names=names,low_memory=False)
a=data.loc[:,('Phone','Base_ID', 'Cell_ID', 'ULTraffic',
'TCPSYNAtteDelay','date')]
data_dia = a.Phone ; x_1 = a.drop('Phone',axis = 1 )
x_train, x_test, y_train, y_test = train_test_split(x_1, data_dia,
test_size=0.3, random_state=42)
classifiers = [
KNeighborsClassifier(3)]
DecisionTreeClassifier()
RandomForestClassifier(),
AdaBoostClassifier(),
GradientBoostingClassifier(),
GaussianNB(),
LinearDiscriminantAnalysis()]
# Logging for Visual Comparison
log_cols = ["Classifier", "Accuracy", "Log Loss"]
log = pd.DataFrame(columns=log_cols)
for clf in classifiers:
clf.fit(x_train, y_train)
name = clf.__class__.__name__
print("=" * 30)
print(name)
print('****Results****')
train_predictions = clf.predict(x_test)
acc = accuracy_score(y_test, train_predictions)
print("Accuracy: {:.4%}".format(acc))
train_predictions = clf.predict_proba(x_test)
ll = log_loss(y_test, train_predictions)
print("Log Loss: {}".format(ll))
log_entry = pd.DataFrame([[name, acc * 100, ll]], columns=log_cols)
log = log.append(log_entry)
report = classification_report(y_test, train_predictions)
print("log:",log)
print("=" * 30)
**This is the function am using to save classification_report results of
classifiers in other file func.py **
def classifaction_report_csv(report):
report_data = []
lines = report.split('\n')
for line in lines[2:-3]:
row = {}
row_data = line.split(' ')
row['class'] = row_data[0]
row['precision'] = float(row_data[1])
row['recall'] = float(row_data[2])
row['f1_score'] = float(row_data[3])
row['support'] = float(row_data[4])
report_data.append(row)
dataframe = pd.DataFrame.from_dict(report_data)
dataframe.to_csv('classification_report.csv', index = False)