我正在尝试使用隐藏层中的激活sigmoid和输出层中的softmax实现多类分类。我之前在输出层使用sigmoid实现了二进制分类,但是当我更改我的代码以执行多类分类时,它不能正常工作,并且在训练期间不会减少损失。 我的代码中有任何错误吗?或者我是否错误地实施了它?
这是我使用的架构: 输入层 - > 3具有乙状结肠的神经元 - > 3具有乙状结肠的神经元 - > SOFTMAX。
到目前为止,这是我的代码:
# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix, log_loss
import matplotlib.pyplot as plt
import load_data as ld
X_train, X_test, Y_train, Y_test = ld.load_dataset()
input_dim = 4
hidden_dim = 20
output_dim = 3
num_epoch = 10
learning_rate = 0.01
learning_curve = []
model = {}
def init():
np.random.seed(1)
model['W1'] = np.random.randn(input_dim,hidden_dim) / np.sqrt(hidden_dim)
model['W2'] = np.random.randn(hidden_dim, hidden_dim) / np.sqrt(hidden_dim)
model['W3'] = np.random.randn(hidden_dim, output_dim) / np.sqrt(hidden_dim)
model['b1'] = np.random.randn(1,hidden_dim)
model['b2'] = np.random.randn(1,hidden_dim)
model['b3'] = np.random.randn(1,output_dim)
def sigmoid(z):
return 1 / (1 + np.exp(-z))
def softmax(z):
z = z - np.max(z)
exp_scores = np.exp(z)
return exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
def softmax_derivative(z , y):
return np.ones(z.shape)
def sigmoid_derivative(z):
return z * (1 - z)
def calculate_error(y, y_pred):
temp = []
for i in range(y.shape[0]):
temp.append(log_loss(y[i,:], y_pred[i,:], normalize=True))
temp = np.array(temp, dtype=np.float32)
temp = temp.reshape([105,1])
return temp
def accuracy(y_actual, y_pred):
for i in range(len(y_actual)):
if (y_pred[i] > 0.5):
y_pred[i] = 1
else:
y_pred[i] = 0
cm = confusion_matrix(y_actual, y_pred)
return (cm[0][0] + cm[1][1])/ len(y_actual)
return y_pred
def forward_propagation(X_test):
W1, W2, W3, b1, b2, b3 = model['W1'], model['W2'], model['W3'], model['b1'], model['b2'], model['b3']
z1 = X_test.dot(W1) + b1
a1 = sigmoid(z1)
z2 = a1.dot(W2) + b2
a2 = sigmoid(z2)
z3 = a2.dot(W3) + b3
a3 = softmax(z3)
return a3
def backward_propagation(X_train, Y_train):
for i in range(num_epoch):
W1, W2, W3, b1, b2, b3 = model['W1'], model['W2'], model['W3'], model['b1'], model['b2'], model['b3']
z1 = X_train.dot(W1) + b1
a1 = sigmoid(z1)
z2 = a1.dot(W2) + b2
a2 = sigmoid(z2)
z3 = a2.dot(W3) + b3
a3 = softmax(z3)
error_a3 = calculate_error(Y_train, a3)
slope_a3 = softmax_derivative(a3, Y_train)
delta_a3 = error_a3 * slope_a3
error_a2 = delta_a3.dot(W3.T)
slope_a2 = sigmoid_derivative(a2)
delta_a2 = error_a2 * slope_a2
error_a1 = delta_a2.dot(W2.T)
slope_a1 = sigmoid_derivative(a1)
delta_a1 = error_a1 * slope_a1
model['W1'] = W1 + learning_rate * X_train.T.dot(delta_a1)
model['W2'] = W2 + learning_rate * (a1.T.dot(delta_a2))
model['W3'] = W3 + learning_rate * (a2.T.dot(delta_a3))
model['b1'] = b1 + np.mean(delta_a1, axis=0, keepdims=True)
model['b2'] = b2 + np.mean(delta_a2, axis=0, keepdims=True)
model['b3'] = b3 + np.mean(delta_a3, axis=0, keepdims=True)
learning_curve.append(np.mean(error_a3))
if(i%1 == 0):
print ("epoch : ",i,", error : ",np.mean(error_a3))
init()
before_backprop = forward_propagation(X_test)
backward_propagation(X_train, Y_train)
after_backprop = forward_propagation(X_test)
learning_curve = np.array(learning_curve, dtype=np.float32)
plt.plot(learning_curve)
plt.title('Learning Curve')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()