对于此数据集,答案应为2和4,但答案都是0。我也尝试了p = fnn.activateOnDataset(trndata),但这给了我一个全纳。我相信有些东西不能与testOnClassData一起使用,但我不确定。有人可以帮忙吗?
from pybrain.datasets import ClassificationDataSet
from pybrain.utilities import percentError
from pybrain.tools.shortcuts import buildNetwork
from pybrain.supervised.trainers import BackpropTrainer
import pandas as pd
from sklearn.metrics import mean_squared_error as MSE
df = pd.read_csv("breast-cancer-wisconsin.csv", header = 0)
num_attributes = df.shape[1] -1
x_train = df[range(1,num_attributes)] #disinclude the id (code) number
y_train = df[[num_attributes]]
input_size = num_attributes
target_size =1
alldata = ClassificationDataSet(input_size, target_size, nb_classes=2, class_labels=[2,4])
alldata.setField('input', x_train)
alldata.setField('target', y_train)
tstdata, trndata = alldata.splitWithProportion( 0.25 )
fnn = buildNetwork(trndata.indim, 5, trndata.outdim, bias=True)# recurrent=False)
trainer = BackpropTrainer( fnn, dataset=trndata, momentum=0.1, verbose=True, weightdecay=0.01 )
trainer.train()
print "Test data: "
print tstdata
answers = trainer.testOnClassData(dataset=tstdata)
perc_err = percentError(answers, tstdata['target'])