Question

我正在尝试使用隐藏层中的激活sigmoid和输出层中的softmax实现多类分类。我之前在输出层使用sigmoid实现了二进制分类，但是当我更改我的代码以执行多类分类时，它不能正常工作，并且在训练期间不会减少损失。我的代码中有任何错误吗？或者我是否错误地实施了它？

这是我使用的架构：输入层 - ＆gt; 3具有乙状结肠的神经元 - > 3具有乙状结肠的神经元 - > SOFTMAX。

到目前为止，这是我的代码：

# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix, log_loss
import matplotlib.pyplot as plt
import load_data as ld

X_train, X_test, Y_train, Y_test = ld.load_dataset()

input_dim = 4
hidden_dim = 20
output_dim = 3
num_epoch = 10
learning_rate = 0.01
learning_curve = []
model = {}

def init():
    np.random.seed(1)
    model['W1'] = np.random.randn(input_dim,hidden_dim) / np.sqrt(hidden_dim)
    model['W2'] = np.random.randn(hidden_dim, hidden_dim) / np.sqrt(hidden_dim)
    model['W3'] = np.random.randn(hidden_dim, output_dim) / np.sqrt(hidden_dim)
    model['b1'] = np.random.randn(1,hidden_dim)
    model['b2'] = np.random.randn(1,hidden_dim)
    model['b3'] = np.random.randn(1,output_dim)

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def softmax(z):
    z = z - np.max(z)
    exp_scores = np.exp(z)
    return exp_scores / np.sum(exp_scores, axis=1, keepdims=True)

def softmax_derivative(z , y):
    return np.ones(z.shape)

def sigmoid_derivative(z):
    return z * (1 - z)

def calculate_error(y, y_pred):
    temp = []
    for i in range(y.shape[0]):
        temp.append(log_loss(y[i,:], y_pred[i,:], normalize=True))
    temp = np.array(temp, dtype=np.float32)
    temp = temp.reshape([105,1])
    return temp

def accuracy(y_actual, y_pred):
    for i in range(len(y_actual)):
        if (y_pred[i] > 0.5):
            y_pred[i] = 1
        else:
            y_pred[i] = 0
    cm = confusion_matrix(y_actual, y_pred)
    return (cm[0][0] + cm[1][1])/ len(y_actual)
    return y_pred


def forward_propagation(X_test):
    W1, W2, W3, b1, b2, b3 = model['W1'], model['W2'], model['W3'], model['b1'], model['b2'], model['b3']
    z1 = X_test.dot(W1) + b1
    a1 = sigmoid(z1)
    z2 = a1.dot(W2) + b2
    a2 = sigmoid(z2)
    z3 = a2.dot(W3) + b3
    a3 = softmax(z3)
    return a3

def backward_propagation(X_train, Y_train):
    for i in range(num_epoch):
        W1, W2, W3, b1, b2, b3 = model['W1'], model['W2'], model['W3'], model['b1'], model['b2'], model['b3']
        z1 = X_train.dot(W1) + b1
        a1 = sigmoid(z1)
        z2 = a1.dot(W2) + b2
        a2 = sigmoid(z2)
        z3 = a2.dot(W3) + b3
        a3 = softmax(z3)

        error_a3 = calculate_error(Y_train, a3)
        slope_a3 = softmax_derivative(a3, Y_train)
        delta_a3 = error_a3 * slope_a3

        error_a2 = delta_a3.dot(W3.T)
        slope_a2 = sigmoid_derivative(a2)
        delta_a2 = error_a2 * slope_a2

        error_a1 = delta_a2.dot(W2.T)
        slope_a1 = sigmoid_derivative(a1)
        delta_a1 = error_a1 * slope_a1

        model['W1'] = W1 + learning_rate * X_train.T.dot(delta_a1)
        model['W2'] = W2 + learning_rate * (a1.T.dot(delta_a2))
        model['W3'] = W3 + learning_rate * (a2.T.dot(delta_a3))
        model['b1'] = b1 + np.mean(delta_a1, axis=0, keepdims=True)
        model['b2'] = b2 + np.mean(delta_a2, axis=0, keepdims=True)
        model['b3'] = b3 + np.mean(delta_a3, axis=0, keepdims=True)
        learning_curve.append(np.mean(error_a3))
        if(i%1 == 0):
            print ("epoch : ",i,", error : ",np.mean(error_a3))

init()
before_backprop = forward_propagation(X_test)
backward_propagation(X_train, Y_train)
after_backprop = forward_propagation(X_test)


learning_curve = np.array(learning_curve, dtype=np.float32)
plt.plot(learning_curve)
plt.title('Learning Curve')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

scracth神经网络的多类分类

0 个答案: