import pandas as pd
import numpy as np
from sklearn import model_selection
from sklearn.model_selection import KFold
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
import matplotlib.pyplot as plt
from sklearn.model_selection import learning_curve
data = pd.read_csv("training5.csv")
X= np.array(data[['x1','x2']])
y=np.array(data['y'])
np.random.seed(55)
estimator = LogisticRegression()
def randomize(X, Y):
permutation = np.random.permutation(Y.shape[0])
X2 = X[permutation,:]
Y2 = Y[permutation]
return X2, Y2
X2, y2 = randomize(X, y)
def draw_learning_curves(X, y, estimator, num_trainings):
train_sizes, train_scores, test_scores = learning_curve(
estimator, X2, y2, cv=None, n_jobs=1, train_sizes=np.linspace(.1, 1.0, num_trainings))
train_scores_mean = np.mean(train_scores, axis=1)
train_scores_std = np.std(train_scores, axis=1)
test_scores_mean = np.mean(test_scores, axis=1)
test_scores_std = np.std(test_scores, axis=1)
plt.grid()
plt.title("Learning Curves")
plt.xlabel("Training examples")
plt.ylabel("Score")
plt.plot(train_scores_mean, 'o-', color="g",
label="Training score")
plt.plot(test_scores_mean, 'o-', color="y",
label="Cross-validation score")
plt.legend(loc="best")
plt.show()
数据http://www.mediafire.com/file/mik8ufsxp91auy4/training5.csv/file 我对ML和python还是陌生的,所以有人可以从def randomize(X,Y)到代码末尾解释每个细节的工作原理 TIA <3
答案 0 :(得分:0)
这是一个很大的问题,但是不幸的是,在Stackoverflow答案中,让狗穿过代码的每一行都是不可行的。我建议您查看DataCamp以获得涵盖您在此处显示的所有内容的优秀课程。
一个简单的立即解决方案是让您在定义函数后调用它。在当前代码块的末尾,只需添加:
draw_learning_curves(X2, y2, estimator, 5)
这将执行您使用X2
,y2
和estimator
变量作为参数定义的函数。将最后一个参数更改为您想可视化的任意数量的训练。