我正在关注Dan Shiffman的this视频教程系列,内容是关于创建一个小的“玩具”神经网络库的信息。
在本系列的早期,本教程使用JS和一个他教如何编码的矩阵库。但是,我使用numpy。
在this视频中,他对梯度下降和反向传播进行了编程。但是,由于我使用的是numpy,所以我的代码似乎不太起作用。如果有人可以帮助我,我将非常感谢!
这是我的代码:
import numpy as np
import math
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def dsigmoid(x):
return x * (1 - x)
class NeuralNetwork:
def __init__(self, Inum, Hnum, Onum):
self.Inum = Inum
self.Hnum = Hnum
self.Onum = Onum
self.lr = 0.1
self.weightsIH = np.random.rand(self.Hnum, self.Inum) * 2 - 1
self.weightsHO = np.random.rand(self.Onum, self.Hnum) * 2 - 1
self.biasH = np.random.rand(self.Hnum) * 2 - 1
self.biasO = np.random.rand(self.Onum) * 2 - 1
def feedForward(self, inputs):
hidden = np.dot(self.weightsIH, np.array(inputs))
hidden = hidden + self.biasH
hidden = sigmoid(hidden)
outputs = np.dot(self.weightsHO, hidden)
outputs = outputs + self.biasO
outputs = sigmoid(outputs)
return outputs
def train(self, inputs, targets):
# Feed Forward
hidden = np.dot(self.weightsIH, np.array(inputs))
hidden = hidden + self.biasH
hidden = sigmoid(hidden)
outputs = np.dot(self.weightsHO, hidden)
outputs = outputs + self.biasO
outputs = sigmoid(outputs)
# Calculate errors
errorsO = np.array(targets) - outputs
# Calculate gradients with derivitive of sigmoid
# TODO: Use numpy for gradient calculation (if possible)
gradients = dsigmoid(outputs)
gradients = gradients * errorsO
gradients = gradients * self.lr
# Calculate deltas
hiddenT = hidden[np.newaxis]
weightsHODeltas = np.dot(gradients, hiddenT)
# Adjust weights by deltas
self.weightsHO = self.weightsHO + weightsHODeltas
# Adjust bias by gradients
self.biasO = self.biasO + gradients
errorsH = np.transpose(self.weightsHO) * errorsO
# Calculate gradients with derivitive of sigmoid
# TODO: Use numpy for gradient calculation (if possible)
gradientsH = dsigmoid(hidden)
gradientsH = gradientsH * errorsH
gradientsH = gradientsH * self.lr
# Calculate deltas
inputsT = np.array(inputs)[np.newaxis]
weightsIHDeltas = np.dot(gradientsH, inputsT)
# Adjust weights by deltas
self.weightsIH = self.weightsIH + weightsIHDeltas
# Adjust bias by gradients
self.biasO = self.biasO + gradientsH
这是我正在运行的代码:
from NN import NeuralNetwork
from random import shuffle
def main():
nn = NeuralNetwork(2, 2, 1)
dataset = [
{
"inputs": [0, 0],
"outputs": 0
},
{
"inputs": [0, 1],
"outputs": 1
},
{
"inputs": [1, 0],
"outputs": 1
},
{
"inputs": [1, 1],
"outputs": 0
}
]
for x in range(100):
for data in dataset:
print(data)
nn.train(data["inputs"], data["outputs"])
shuffle(dataset)
for data in dataset:
print(data)
nn.feedForward(data["inputs"])
if __name__ == '__main__':
main()
这是我收到的错误消息:
Traceback (most recent call last):
File "c:\Users\ghost\Desktop\Notes\Programming\Machine Learning\NN From Scratch\Yet Another Neural Network Library\main.py", line 38, in <module>
main()
File "c:\Users\ghost\Desktop\Notes\Programming\Machine Learning\NN From Scratch\Yet Another Neural Network Library\main.py", line 30, in main
nn.train(data["inputs"], data["outputs"])
File "c:\Users\ghost\Desktop\Notes\Programming\Machine Learning\NN From Scratch\Yet Another Neural Network Library\NN.py", line 77, in train
weightsIHDeltas = np.dot(gradientsH, inputsT)
ValueError: shapes (2,2) and (1,2) not aligned: 2 (dim 1) != 1 (dim 0)
答案 0 :(得分:1)
问题是您对numpy数组的尺寸感到困惑。在用numpy编写ML代码时,更容易处理列向量,因为这是在纸上推导方程式时要做的。另外,您的代码中存在逻辑错误。下面是更正的代码:
import numpy as np
import math
from random import shuffle
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def dsigmoid(x):
return x * (1 - x)
class NeuralNetwork:
def __init__(self, Inum, Hnum, Onum):
self.Inum = Inum
self.Hnum = Hnum
self.Onum = Onum
self.lr = 0.1
self.weightsIH = np.random.rand(self.Hnum, self.Inum) * 2 - 1
self.weightsHO = np.random.rand(self.Onum, self.Hnum) * 2 - 1
self.biasH = np.random.rand(self.Hnum) * 2 - 1
self.biasO = np.random.rand(self.Onum) * 2 - 1
def feedForward(self, inputs):
hidden = np.dot(self.weightsIH, np.array(inputs))
hidden = hidden + self.biasH
hidden = sigmoid(hidden)
outputs = np.dot(self.weightsHO, hidden)
outputs = outputs + self.biasO
outputs = sigmoid(outputs)
return outputs
def train(self, inputs, targets):
"""
NOTE : Always deal with column vectors as you do in maths.
"""
# Feed Forward
hidden = np.dot(self.weightsIH, np.array(inputs))
hidden = hidden + self.biasH
hidden = sigmoid(hidden)
outputs = np.dot(self.weightsHO, hidden)
outputs = outputs + self.biasO
outputs = sigmoid(outputs)
# Calculate errors
errorsO = np.array(targets) - outputs
errorsO = errorsO[:, np.newaxis] # errorsO is a column now
# Calculate gradients with derivitive of sigmoid
gradientsO_ = dsigmoid(outputs)
# Convert gradientsO also to column vector before taking product
gradientsO_ = gradientsO_[:, np.newaxis] * errorsO # Hadamard product to get a new column vector
gradientsO = gradientsO_ * self.lr
# Calculate deltas
hiddenT = hidden[:, np.newaxis] # hidden is a column now
weightsHODeltas = np.dot(hiddenT, gradientsO.T)
# Adjust weights by deltas
self.weightsHO = self.weightsHO + weightsHODeltas.reshape(self.weightsHO.shape)
# Adjust bias by gradients
self.biasO = self.biasO + gradientsO.reshape(self.biasO.shape)
# Hidden layer
errorsH = np.dot(np.transpose(self.weightsHO), gradientsO_) # You had a conceptual mistake here. You don't incoporate learning rate here
# Calculate gradients with derivitive of sigmoid
gradientsH = dsigmoid(hidden)
gradientsH = gradientsH[:, np.newaxis] * errorsH
gradientsH = gradientsH * self.lr
# Calculate deltas
inputsT = np.array(inputs)[:, np.newaxis]
weightsIHDeltas = np.dot(inputsT, gradientsH.T)
# Adjust weights by deltas
self.weightsIH = self.weightsIH + weightsIHDeltas.reshape(self.weightsIH.shape)
# Adjust bias by gradients
self.biasH = self.biasH + gradientsH.reshape(self.biasH.shape)
def main():
nn = NeuralNetwork(2, 2, 1)
dataset = [
{
"inputs": [0, 0],
"outputs": 0
},
{
"inputs": [0, 1],
"outputs": 1
},
{
"inputs": [1, 0],
"outputs": 1
},
{
"inputs": [1, 1],
"outputs": 0
}
]
for x in range(100):
for data in dataset:
# print(data)
nn.train(data["inputs"], data["outputs"])
shuffle(dataset)
for data in dataset:
print(data)
nn.feedForward(data["inputs"])
if __name__ == '__main__':
main()
P.S:另外,您可以不重复使用前馈部分的代码来提高代码质量。
答案 1 :(得分:0)
好的,感谢Zoma_alchemist和toy neural network github的一些帮助,我已经设法修复了代码,使其运行良好!
这是NN类:
import numpy as np
import math
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def dsigmoid(x):
return x * (1 - x)
class NeuralNetwork:
def __init__(self, Inum, Hnum, Onum):
self.Inum = Inum
self.Hnum = Hnum
self.Onum = Onum
self.lr = 0.1
self.weightsIH = np.random.rand(self.Hnum, self.Inum) * 2 - 1
self.weightsHO = np.random.rand(self.Onum, self.Hnum) * 2 - 1
self.biasH = np.random.rand(self.Hnum) * 2 - 1
self.biasO = np.random.rand(self.Onum) * 2 - 1
def feedForward(self, inputs):
hidden = np.dot(self.weightsIH, np.array(inputs))
hidden = hidden + self.biasH
hidden = sigmoid(hidden)
outputs = np.dot(self.weightsHO, hidden)
outputs = outputs + self.biasO
outputs = sigmoid(outputs)
return outputs
def train(self, inputs, targets):
"""
NOTE : Always deal with column vectors as you do in maths.
"""
# Feed Forward
hidden = np.dot(self.weightsIH, np.array(inputs))
hidden = hidden + self.biasH
hidden = sigmoid(hidden)
outputs = np.dot(self.weightsHO, hidden)
outputs = outputs + self.biasO
outputs = sigmoid(outputs)
# Calculate errors
errorsO = np.array(targets) - outputs
errorsO = errorsO[:, np.newaxis] # errorsO is a column now
# Calculate gradients with derivitive of sigmoid
gradientsO_ = dsigmoid(outputs)
# Convert gradientsO also to column vector before taking product
gradientsO_ = gradientsO_[:, np.newaxis] * errorsO # Hadamard product to get a new column vector
gradientsO = gradientsO_ * self.lr
# Calculate deltas
hiddenT = hidden[np.newaxis] # hidden is a column now
weightsHODeltas = np.dot(gradientsO, hiddenT)
# Adjust weights by deltas
self.weightsHO = self.weightsHO + weightsHODeltas
# Adjust bias by gradients
self.biasO = self.biasO + gradientsO.reshape(self.biasO.shape)
# Hidden layer
errorsH = np.dot(np.transpose(self.weightsHO), errorsO) # You had a conceptual mistake here. You don't incoporate learning rate here
# Calculate gradients with derivitive of sigmoid
gradientsH = dsigmoid(hidden)
gradientsH = gradientsH[:, np.newaxis] * errorsH
gradientsH = gradientsH * self.lr
# Calculate deltas
inputsT = np.array(inputs)[np.newaxis]
weightsIHDeltas = np.dot(gradientsH, inputsT)
# Adjust weights by deltas
self.weightsIH = self.weightsIH + weightsIHDeltas
# Adjust bias by gradients
self.biasH = self.biasH + gradientsH.reshape(self.biasH.shape)
这是我在其上运行的XOR的代码:
from NN import NeuralNetwork
from random import shuffle, choice
def main():
nn = NeuralNetwork(2, 2, 1)
dataset = [
{
"inputs": [0, 0],
"outputs": 0
},
{
"inputs": [0, 1],
"outputs": 1
},
{
"inputs": [1, 0],
"outputs": 1
},
{
"inputs": [1, 1],
"outputs": 0
}
]
for x in range(10000):
for data in dataset:
nn.train(data["inputs"], data["outputs"])
shuffle(dataset)
print(nn.feedForward([0, 0]))
print(nn.feedForward([0, 1]))
print(nn.feedForward([1, 0]))
print(nn.feedForward([1, 1]))
if __name__ == '__main__':
main()
结果如下:
[ 0.04557893]
[ 0.96304187]
[ 0.96304197]
[ 0.03685831]