我是python和机器学习的新手。我从头开始编码逻辑回归,我想添加交叉验证。在进行列车测试拆分后,我能以某种方式将训练数据分成5个分区,然后在该分区的每个部分运行我的算法吗?尝试添加sklearn的交叉验证并没有从头开始为我工作。任何正确方向的帮助都将受到赞赏。
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25, random_state=0)
def Sigmoid(z):
return 1/(1 + np.exp(-z))
def Hypothesis(theta, X):
return Sigmoid(X @ theta)
def Cost_Function(X,Y,theta,m):
hi = Hypothesis(theta, X)
_y = Y.reshape(-1, 1)
J = 1/float(m) * np.sum(-_y * np.log(hi) - (1-_y) * np.log(1-hi))
return J
def Cost_Function_Derivative(X,Y,theta,m,alpha):
hi = Hypothesis(theta,X)
_y = Y.reshape(-1, 1)
J = alpha/float(m) * X.T @ (hi - _y)
return J
def Gradient_Descent(X,Y,theta,m,alpha):
new_theta = theta - Cost_Function_Derivative(X,Y,theta,m,alpha)
return new_theta
def Accuracy(theta):
correct = 0
length = len(X_test)
prediction = (Hypothesis(theta, X_test) > 0.5)
_y = Y_test.reshape(-1, 1)
correct = prediction == _y
my_accuracy = (np.sum(correct) / length)*100
print ('LR Accuracy: ', my_accuracy, "%")
def Logistic_Regression(X,Y,alpha,theta,num_iters):
m = len(Y)
for x in range(num_iters):
new_theta = Gradient_Descent(X,Y,theta,m,alpha)
theta = new_theta
if x % 100 == 0:
print #('theta: ', theta)
print #('cost: ', Cost_Function(X,Y,theta,m))
Accuracy(theta)
ep = .012
initial_theta = np.random.rand(X_train.shape[1],1) * 2 * ep - ep
alpha = 0.5
iterations = 10000
Logistic_Regression(X_train,Y_train,alpha,initial_theta,iterations)