以下代码的所有需要的模块都已导入
date = []
usage = []
date = genfromtxt(‘date.csv’)
usage = genfromtxt(‘usage.csv’)
test = genfromtxt(‘test.csv’)
print (len(date))
print (len(usage))
dataframe = pd.DataFrame({
‘Date’: (date),
‘Usage’: (usage)
})
删除NaN数据
dataframe = dataframe.dropna()
print (dataframe)
df = dataframe.drop(dataframe.index[[-1,-4]])
array = df.values
X = array[:,0:1]
Y = array[:,1]
validation_size = 0.20
seed = 7
X_train, X_validation, Y_train, Y_validation =
model_selection.train_test_split(X, Y, test_size=validation_size, random_state=seed)
seed = 7
scoring = ‘accuracy’
抽查算法
models = []
models.append((‘LR’, LogisticRegression()))
models.append((‘LDA’, LinearDiscriminantAnalysis()))
models.append((‘KNN’, KNeighborsClassifier()))
models.append((‘CART’, DecisionTreeClassifier()))
models.append((‘NB’, GaussianNB()))
models.append((‘SVM’, SVC()))
依次评估每个模型
results = []
names = []
for name, model in models:
kfold = model_selection.KFold(n_splits=10, random_state=seed)
cv_results = model_selection.cross_val_score(model, X_train, Y_train, cv=kfold, scoring=scoring)
results.append(cv_results)
names.append(name)
msg = “%s: %f (%f)” % (name, cv_results.mean(), cv_results.std())
print(msg)
比较算法
fig = plt.figure()
fig.suptitle(‘Algorithm Comparison’)
ax = fig.add_subplot(111)
plt.boxplot(results)
ax.set_xticklabels(names)
plt.show()
错误讯息:
1.UserWarning:先验不等于1.重新规范用户警告回溯(最近一次调用最后一次):
File “data_0.py”, line 111, in
2.出现错误的行:
cv_results = model_selection.cross_val_score(model, X_train, Y_train,cv=kfold, scoring=scoring)
File “/Users/nelsondsouza/anaconda/lib/python2.7/sitepackages/sklearn/model_selection/_validation.py”, line 140,
in cross_val_score for train, test in cv_iter)
File “/Users/nelsondsouza/anaconda/lib/python2.7/sitepackages/sklearn/externals/joblib/parallel.py”, line 758,
in __call__ while self.dispatch_one_batch(iterator):
File “/Users/nelsondsouza/anaconda/lib/python2.7/sitepackages/sklearn/externals/joblib/parallel.py”, line 608,
in dispatch_one_batch self._dispatch(tasks)
File “/Users/nelsondsouza/anaconda/lib/python2.7/sitepackages/sklearn/externals/joblib/parallel.py”, line 571,
in _dispatch job = self._backend.apply_async(batch, callback=cb)
File “/Users/nelsondsouza/anaconda/lib/python2.7/sitepackages/sklearn/externals/joblib/_parallel_backends.py”, line 109,
in apply_async result = ImmediateResult(func)
File “/Users/nelsondsouza/anaconda/lib/python2.7/sitepackages/sklearn/externals/joblib/_parallel_backends.py”, line 326,
in __init__ self.results = batch()
File “/Users/nelsondsouza/anaconda/lib/python2.7/sitepackages/sklearn/externals/joblib/parallel.py”, line 131,
in __call__ return [func(*args, **kwargs) for func, args, kwargs in self.items]
File “/Users/nelsondsouza/anaconda/lib/python2.7/sitepackages/sklearn/model_selection/_validation.py”, line 238,
in _fit_and_score estimator.fit(X_train, y_train, **fit_params)
File “/Users/nelsondsouza/anaconda/lib/python2.7/sitepackages/sklearn/discriminant_analysis.py”, line 468,
in fit self._solve_svd(X, y)
File “/Users/nelsondsouza/anaconda/lib/python2.7/sitepackages/sklearn/discriminant_analysis.py”, line 378,
in solve_svd fac = 1. / (n_samples – n_classes)
3.ZeroDivisionError:浮点除零