代码如下。
from sklearn import svm,datasets
class Dataset:
def __init__(self, name):
self.name = name
def download_data(self):
if self.name == 'iris':
self.download_data = datasets.load_iris()
elif self.name == 'digits':
self.download_data = datasets.load_digits()
else:
print("Dataset Error:No name datasets.")
def generate_xy(self):
self.download_data()
x = self.download_data.data
y = self.download_data.target
print('\nOriginal data looks like this:\n',x)
print('\nLabels looks like this:\n',y)
print('\n x:', len(x),' ',len(x[0]))
print('\n y:', len(y))
return x,y
def get_train_test_set(self, ratio):
x, y = self.generate_xy()
n_samples = len(x)
n_train = n_samples * ratio
X_train = x[:n_train]
Y_train = y[:n_train]
X_test = x[n_train:]
Y_test = y[n_train:]
return X_train, Y_train, X_test, Y_test
data = Dataset('digits')
X_train, Y_train, X_test, Y_test = data.get_train_test_set(0.8)
clf = svm.SVC()
print(clf.fit(X_train, Y_train))
test_point = X_test[12]
y_true = Y_test[12]
print(clf.predict(test_point))
print(y_true)
predict_result = []
for element in X_test:
predict_result.append(clf.predict(element))
print('predict_result.len:', len(predict_result))
print('Y_test.len', len(Y_test))
if len(predict_result) != len(Y_test):
print('something wrong with the caculation, predict_result.len:',len(predict_result), 'Y_test.len:%d',len(Y_test))
else:
cnt = 0
for i in range(0, len(predict_result), 1):
if(predict_result[i] == Y_test[i]):
cnt += 1
precision_ratio = cnt / len(predict_result)
print('precision ratio = ', precision_ratio)
编译器指出这两个地方: X_train,Y_train,X_test,Y_test = data.get_train_test_set(0.8) X_train = x [:n_train]
我想也许是因为' n_train'浮动,这是不正确的。所以我纠正了这样的代码:
X_train = x[:int(n_train)]
它仍然是错的。我不知道我错在哪里。