我正在使用“月亮”数据集:https://vincentblog.xyz/posts/neural-networks-from-scratch-in-python来遵循本指南。 我想再增加一个隐藏层(也就是4个神经元),那么如何扩展它呢?如果再增加一层隐藏层,我会在前馈和反向传播部分特别困惑。以下代码仅适用于一个隐藏层
def forward_propagation(X, W1, b1, W2, b2):
forward_params = {}
Z1 = np.dot(W1, X.T) + b1
A1 = relu(Z1)
Z2 = np.dot(W2, A1) + b2
A2 = sigmoid(Z2)
forward_params = {
"Z1": Z1,
"A1": A1,
"Z2": Z2,
"A2": A2,
}
return forward_params
def backward_propagation(forward_params, X, Y):
A2 = forward_params["A2"]
Z2 = forward_params["Z2"]
A1 = forward_params["A1"]
Z1 = forward_params["Z1"]
data_size = Y.shape[1]
dZ2 = A2 - Y
dW2 = np.dot(dZ2, A1.T) / data_size
db2 = np.sum(dZ2, axis=1) / data_size
dZ1 = np.dot(dW2.T, dZ2) * prime_relu(Z1)
dW1 = np.dot(dZ1, X) / data_size
db1 = np.sum(dZ1, axis=1) / data_size
db1 = np.reshape(db1, (db1.shape[0], 1))
grads = {
"dZ2": dZ2,
"dW2": dW2,
"db2": db2,
"dZ1": dZ1,
"dW1": dW1,
"db1": db1,
}
return grads
还要修改主要功能:
def one_hidden_layer_model(X, y, epochs=1000, learning_rate=0.003):
np.random.seed(0)
input_size = X_train.shape[1]
output_size = 1
hidden_layer_nodes = 4
W1 = np.random.randn(hidden_layer_nodes, input_size) / np.sqrt(input_size)
b1 = np.zeros((hidden_layer_nodes, 1))
W2 = np.random.randn(output_size, hidden_layer_nodes) / np.sqrt(hidden_layer_nodes)
b2 = np.zeros((output_size, 1))
loss_history = []
for i in range(epochs):
forward_params = forward_propagation(X, W1, b1, W2, b2)
A2 = forward_params["A2"]
loss = loss_function(A2, y)
grads = backward_propagation(forward_params, X, y)
W1 -= learning_rate * grads["dW1"]
b1 -= learning_rate * grads["db1"]
W2 -= learning_rate * grads["dW2"]
b2 -= learning_rate * grads["db2"]
if i % 1000 == 0:
loss_history.append(loss)
print ("Costo e iteracion %i: %f" % (i, loss))
return W1, b1, W2, b2
按照C. Leconte的回答,它的工作效率非常低。这是代码部分:
def predict(W1, b1, W2, b2, W3, b3, X):
data_size = X.shape[0]
forward_params = forward_propagation(X, W1, b1, W2, b2, W3, b3)
y_prediction = np.zeros((1, data_size))
A3 = forward_params["A3"]
for i in range(A3.shape[1]):
y_prediction[0, i] = 1 if A3[0, i] > 0.5 else 0
return y_prediction
train_predictions = predict(W1, b1, W2, b2, W3, b3, X_train)
validation_predictions = predict(W1, b1, W2, b2, W3, b3, X_val)
print("train accuracy: {} %".format(100 - np.mean(np.abs(train_predictions - y_train)) * 100))
print("test accuracy: {} %".format(100 - np.mean(np.abs(validation_predictions - y_val)) * 100))
我尝试了不同的学习率,但我最多获得了50 ++%的准确性。
答案 0 :(得分:1)
def forward_propagation(X, W1, b1, W2, b2, W3, b3):
forward_params = {}
Z1 = np.dot(W1, X.T) + b1
A1 = relu(Z1)
Z2 = np.dot(W2, A1) + b2
A2 = relu(Z2)
Z3 = np.dot(W3, A2) + b3
A3 = sigmoid(Z3)
forward_params = {
"Z1": Z1,
"A1": A1,
"Z2": Z2,
"A2": A2,
"Z3": Z3,
"A3": A3
}
return forward_params
def backward_propagation(forward_params, X, Y):
A3 = forward_params["A3"]
Z3 = forward_params["Z3"]
A2 = forward_params["A2"]
Z2 = forward_params["Z2"]
A1 = forward_params["A1"]
Z1 = forward_params["Z1"]
data_size = Y.shape[1]
dZ3 = A3 - Y
dW3 = np.dot(dZ3, A2.T) / data_size
db3 = np.sum(dZ3, axis=1) / data_size
dZ2 = np.dot(dW3.T, dZ3) * prime_relu(Z2)
dW2 = np.dot(dZ2, A1.T) / data_size
db2 = np.sum(dZ2, axis=1) / data_size
db2 = np.reshape(db2, (db2.shape[0], 1))
dZ1 = np.dot(dW2.T, dZ2) * prime_relu(Z1)
dW1 = np.dot(dZ1, X) / data_size
db1 = np.sum(dZ1, axis=1) / data_size
db1 = np.reshape(db1, (db1.shape[0], 1))
grads = {
"dZ3": dZ3,
"dW3": dW3,
"db3": db3,
"dZ2": dZ2,
"dW2": dW2,
"db2": db2,
"dZ1": dZ1,
"dW1": dW1,
"db1": db1,
}
return grads
编辑:
类似的东西。请注意,我在函数reverse_propagation()的开头添加了两行。我忘了放A3和Z3。
def one_hidden_layer_model(X, y, epochs=1000, learning_rate=0.003):
np.random.seed(0)
input_size = X_train.shape[1]
output_size = 1
hidden_layer_nodes = 4
hidden_layer_nodes2 = 4
W1 = np.random.randn(hidden_layer_nodes, input_size) / np.sqrt(input_size)
b1 = np.zeros((hidden_layer_nodes, 1))
W2 = np.random.randn(hidden_layer_nodes2, hidden_layer_nodes) / np.sqrt(hidden_layer_nodes)
b2 = np.zeros((hidden_layer_nodes2, 1))
W3 = np.random.randn(output_size, hidden_layer_nodes2) / np.sqrt(hidden_layer_nodes2)
b3 = np.zeros((output_size, 1))
loss_history = []
for i in range(epochs):
forward_params = forward_propagation(X, W1, b1, W2, b2, W3, b3)
A3 = forward_params["A3"]
loss = loss_function(A3, y)
grads = backward_propagation(forward_params, X, y)
W1 -= learning_rate * grads["dW1"]
b1 -= learning_rate * grads["db1"]
W2 -= learning_rate * grads["dW2"]
b2 -= learning_rate * grads["db2"]
W3 -= learning_rate * grads["dW3"]
b3 -= learning_rate * grads["db3"]
if i % 1000 == 0:
loss_history.append(loss)
print ("Costo e iteracion %i: %f" % (i, loss))
return W1, b1, W2, b2, W3, b3