我已经实现了一个简单的多层感知器(只有一个隐藏层),可以学习回归问题。我写了它,以便可以指定sigmoid,tanh和relu激活之间的选择。然后将平方误差作为损失函数实现。
我现在想要允许选择使用相同的模型来学习多类分类问题,因此希望实现选择使用softmax激活以及交叉熵损失。在我的下面的代码中,需要进行的唯一更改(我希望)是在activation()
和loss()
函数中实现这些更改,然后这应该在前进中开箱即用传球和后卫。此代码运行模拟我的模型学习XOR函数,其中所选激活函数应在顶部取消注释。
然而,我实际上已经失去了实现这两个功能,甚至更多的是他们的衍生品。任何帮助和指导表示赞赏。
import sys
import numpy as np
activation = 'sigmoid'
# activation = 'tanh'
# activation = 'relu'
# activation = 'softmax'
numEpochs = 10000
class DataSet:
def __init__(self, data, trainSplit=1):
self.size = len(data)
self.trainSize = int(self.size * trainSplit)
self.testSize = self.size - self.trainSize
self.inputs, self.labels = [], []
for i in range(len(data)):
self.inputs.append(data[i][0])
self.labels.append(data[i][1])
self.trainInputs = self.inputs[:self.trainSize]
self.trainLabels = self.labels[:self.trainSize]
self.testInputs = self.inputs[self.trainSize:]
self.testLabels = self.labels[self.trainSize:]
try:
self.numInputs = len(self.inputs[0])
except TypeError:
self.numInputs = 1
try:
self.numOutputs = len(self.labels[0])
except TypeError:
self.numOutputs = 1
class MLP:
def __init__(self, numInputs, numHidden, numOutputs, activationFunction):
# MLP architecture sizes
self.numInputs = numInputs
self.numHidden = numHidden
self.numOutputs = numOutputs
self.activationFunction = activationFunction.lower()
# MLP weights
self.IH_weights = np.random.rand(numInputs, numHidden) # Input -> Hidden
self.HO_weights = np.random.rand(numHidden, numOutputs) # Hidden -> Output
# MLP biases
self.IH_bias = np.zeros((1, numHidden))
self.HO_bias = np.zeros((1, numOutputs))
# Gradients corresponding to weight matrices computed during backprop
self.IH_w_gradients = np.zeros_like(self.IH_weights)
self.HO_w_gradients = np.zeros_like(self.HO_weights)
# Gradients corresponding to biases computed during backprop
self.IH_b_gradients = np.zeros_like(self.IH_bias)
self.HO_b_gradients = np.zeros_like(self.HO_bias)
# Input, hidden and output layer neuron values
self.I = np.zeros(numInputs) # Inputs
self.L = np.zeros(numOutputs) # Labels
self.H = np.zeros(numHidden) # Hidden
self.O = np.zeros(numOutputs) # Output
def activation(self, x, derivative=False):
if self.activationFunction == 'sigmoid':
if derivative:
return x * (1 - x)
return 1 / (1 + np.exp(-x))
if self.activationFunction == 'tanh':
if derivative:
return 1 - np.tanh(x) ** 2
return np.tanh(x)
if self.activationFunction == 'relu':
if derivative:
return (x > 0).astype(float)
return np.maximum(0, x)
# TO DO ################################################################
if self.activationFunction == 'softmax':
if derivative:
return 0
return 0
print("ERROR: Activation function not found.")
sys.exit()
def loss(self, labels, predictions, derivative=False):
# TO DO ################################################################
# Cross-Entropy
if self.activationFunction == 'softmax':
if derivative:
return 0
return 0
# Squared Error
else:
if derivative:
return (-2 * labels) + (2 * predictions)
return (labels - predictions) ** 2
def forward(self, inputs):
# Ensure that inputs is a list
try:
len(inputs)
except TypeError:
inputs = [inputs]
self.I = np.array(inputs).reshape(1, self.numInputs)
self.H = self.I.dot(self.IH_weights) + self.IH_bias
self.H = self.activation(self.H)
self.O = self.H.dot(self.HO_weights) + self.HO_bias
self.O = self.activation(self.O)
def backwards(self, labels):
# Ensure that labels is a list
try:
len(labels)
except TypeError:
labels = [labels]
self.L = np.array(labels)
self.O_error = self.loss(self.O, self.L)
self.O_delta = self.loss(self.O, self.L, derivative=True) * self.activation(self.O, derivative=True)
self.H_error = self.O_delta.dot(self.HO_weights.T)
self.H_delta = self.H_error * self.activation(self.H, derivative=True)
self.IH_w_gradients += self.I.T.dot(self.H_delta)
self.HO_w_gradients += self.H.T.dot(self.O_delta)
self.IH_b_gradients += self.H_delta
self.HO_b_gradients += self.O_delta
return self.O_error
def updateWeights(self, learningRate):
self.IH_weights += learningRate * self.IH_w_gradients
self.HO_weights += learningRate * self.HO_w_gradients
self.IH_bias += learningRate * self.IH_b_gradients
self.HO_bias += learningRate * self.HO_b_gradients
self.IH_w_gradients = np.zeros_like(self.IH_weights)
self.HO_w_gradients = np.zeros_like(self.HO_weights)
self.IH_b_gradients = np.zeros_like(self.IH_bias)
self.HO_b_gradients = np.zeros_like(self.HO_bias)
def process(self, data, train=False, learningRate=0):
if train:
size = data.trainSize
inputs = data.trainInputs
labels = data.trainLabels
else:
size = data.testSize
inputs = data.testInputs
labels = data.testLabels
errors = []
for i in range(size):
self.forward(inputs[i])
errors.append(self.backwards(labels[i]))
if train:
self.updateWeights(learningRate)
return np.mean(errors)
data1 = DataSet([
[[0, 0], 0],
[[0, 1], 1],
[[1, 0], 1],
[[1, 1], 0]
])
data2 = DataSet([
[[0, 0], -1],
[[0, 1], 1],
[[1, 0], 1],
[[1, 1], -1]
])
data3 = DataSet([
[[0, 0], [1, 0]],
[[0, 1], [0, 1]],
[[1, 0], [0, 1]],
[[1, 1], [1, 0]]
])
if activation == 'sigmoid':
data = data1
mlp = MLP(data.numInputs, 2, data.numOutputs, 'sigmoid')
learningRate = 1
if activation == 'tanh':
data = data2
mlp = MLP(data.numInputs, 2, data.numOutputs, 'tanh')
learningRate = 0.1
if activation == 'relu':
data = data1
mlp = MLP(data.numInputs, 2, data.numOutputs, 'relu')
learningRate = 0.001
if activation == 'softmax':
data = data3
mlp = MLP(data.numInputs, 2, data.numOutputs, 'softmax')
learningRate = 0.01
################################################################################
# TO DO: UPDATE WEIGHTS AT INTERVALS, NOT EVERY EPOCH
################################################################################
losses = []
for epoch in range(numEpochs):
epochLoss = mlp.process(data, train=True, learningRate=learningRate)
losses.append(epochLoss)
if epoch % 1000 == 0 or epoch == numEpochs - 1:
print("EPOCH:", epoch)
print("LOSS: ", epochLoss, "\n")
答案 0 :(得分:0)
不幸的是,softmax并不像您发布的其他激活功能那么容易。对于激活函数,您必须计算exp(y_i),然后除以Y中每个y_k的和exp(y_k)。对于导数,您必须计算每个输出的偏导数的每个组合(n ^ 2个组合)每个神经元的输入。幸运的是,失去它有点容易理解,因为你可以考虑softmax给你一些概率(所以它类似于概率分布)并且你计算交叉熵在返回值和目标值之间。