如何在ANN中再添加一个隐藏层?而且精度低

时间:2019-12-30 09:18:30

标签: python python-3.x scikit-learn neural-network

我正在使用“月亮”数据集:https://vincentblog.xyz/posts/neural-networks-from-scratch-in-python来遵循本指南。 我想再增加一个隐藏层(也就是4个神经元),那么如何扩展它呢?如果再增加一层隐藏层,我会在前馈和反向传播部分特别困惑。以下代码仅适用于一个隐藏层

def forward_propagation(X, W1, b1, W2, b2):
  forward_params = {}

  Z1 = np.dot(W1, X.T) + b1
  A1 = relu(Z1)
  Z2 = np.dot(W2, A1) + b2
  A2 = sigmoid(Z2)

  forward_params = {
      "Z1": Z1,
      "A1": A1,
      "Z2": Z2,
      "A2": A2,
  }

  return forward_params
def backward_propagation(forward_params, X, Y):
  A2 = forward_params["A2"]
  Z2 = forward_params["Z2"]
  A1 = forward_params["A1"]
  Z1 = forward_params["Z1"]

  data_size = Y.shape[1]

  dZ2 = A2 - Y
  dW2 = np.dot(dZ2, A1.T) / data_size

  db2 = np.sum(dZ2, axis=1) / data_size

  dZ1 = np.dot(dW2.T, dZ2) * prime_relu(Z1)
  dW1 = np.dot(dZ1, X) / data_size
  db1 = np.sum(dZ1, axis=1) / data_size

  db1 = np.reshape(db1, (db1.shape[0], 1))

  grads = {
      "dZ2": dZ2,
      "dW2": dW2,
      "db2": db2,
      "dZ1": dZ1,
      "dW1": dW1,
      "db1": db1,
  }

  return grads

还要修改主要功能:

def one_hidden_layer_model(X, y, epochs=1000, learning_rate=0.003):
  np.random.seed(0)
  input_size = X_train.shape[1]
  output_size = 1
  hidden_layer_nodes = 4

  W1 = np.random.randn(hidden_layer_nodes, input_size) / np.sqrt(input_size)
  b1 = np.zeros((hidden_layer_nodes, 1))
  W2 = np.random.randn(output_size, hidden_layer_nodes) / np.sqrt(hidden_layer_nodes)
  b2 = np.zeros((output_size, 1))

  loss_history = []

  for i in range(epochs):
    forward_params = forward_propagation(X, W1, b1, W2, b2)
    A2 = forward_params["A2"]
    loss = loss_function(A2, y)

    grads = backward_propagation(forward_params, X, y)

    W1 -= learning_rate * grads["dW1"]

    b1 -= learning_rate * grads["db1"]

    W2 -= learning_rate * grads["dW2"]
    b2 -= learning_rate * grads["db2"]


    if i % 1000 == 0:
      loss_history.append(loss)
      print ("Costo e iteracion %i: %f" % (i, loss))

  return W1, b1, W2, b2

按照C. Leconte的回答,它的工作效率非常低。这是代码部分:

def predict(W1, b1, W2, b2, W3, b3, X):
  data_size = X.shape[0]
  forward_params = forward_propagation(X, W1, b1, W2, b2, W3, b3)

  y_prediction = np.zeros((1, data_size))

  A3 = forward_params["A3"]

  for i in range(A3.shape[1]):
    y_prediction[0, i] = 1 if A3[0, i] > 0.5 else 0

  return y_prediction
train_predictions = predict(W1, b1, W2, b2, W3, b3, X_train)
validation_predictions = predict(W1, b1, W2, b2, W3, b3, X_val)

print("train accuracy: {} %".format(100 - np.mean(np.abs(train_predictions - y_train)) * 100))
print("test accuracy: {} %".format(100 - np.mean(np.abs(validation_predictions - y_val)) * 100))

我尝试了不同的学习率,但我最多获得了50 ++%的准确性。

1 个答案:

答案 0 :(得分:1)

def forward_propagation(X, W1, b1, W2, b2, W3, b3):
    forward_params = {}

    Z1 = np.dot(W1, X.T) + b1
    A1 = relu(Z1)
    Z2 = np.dot(W2, A1) + b2
    A2 = relu(Z2)
    Z3 = np.dot(W3, A2) + b3
    A3 = sigmoid(Z3)

    forward_params = {
        "Z1": Z1,
        "A1": A1,
        "Z2": Z2,
        "A2": A2,
        "Z3": Z3,
        "A3": A3
        }

    return forward_params



def backward_propagation(forward_params, X, Y):
    A3 = forward_params["A3"]
    Z3 = forward_params["Z3"]
    A2 = forward_params["A2"]
    Z2 = forward_params["Z2"]
    A1 = forward_params["A1"]
    Z1 = forward_params["Z1"]

    data_size = Y.shape[1]

    dZ3 = A3 - Y
    dW3 = np.dot(dZ3, A2.T) / data_size

    db3 = np.sum(dZ3, axis=1) / data_size


    dZ2 = np.dot(dW3.T, dZ3) * prime_relu(Z2)
    dW2 = np.dot(dZ2, A1.T) / data_size
    db2 = np.sum(dZ2, axis=1) / data_size

    db2 = np.reshape(db2, (db2.shape[0], 1))


    dZ1 = np.dot(dW2.T, dZ2) * prime_relu(Z1)
    dW1 = np.dot(dZ1, X) / data_size
    db1 = np.sum(dZ1, axis=1) / data_size

    db1 = np.reshape(db1, (db1.shape[0], 1))

    grads = {
        "dZ3": dZ3,
        "dW3": dW3,
        "db3": db3,
        "dZ2": dZ2,
        "dW2": dW2,
        "db2": db2,
        "dZ1": dZ1,
        "dW1": dW1,
        "db1": db1,
    }

    return grads

编辑:

类似的东西。请注意,我在函数reverse_propagation()的开头添加了两行。我忘了放A3和Z3。

def one_hidden_layer_model(X, y, epochs=1000, learning_rate=0.003):
    np.random.seed(0)
    input_size = X_train.shape[1]
    output_size = 1
    hidden_layer_nodes = 4
    hidden_layer_nodes2 = 4

    W1 = np.random.randn(hidden_layer_nodes, input_size) / np.sqrt(input_size)
    b1 = np.zeros((hidden_layer_nodes, 1))
    W2 = np.random.randn(hidden_layer_nodes2, hidden_layer_nodes) / np.sqrt(hidden_layer_nodes)
    b2 = np.zeros((hidden_layer_nodes2, 1))
    W3 = np.random.randn(output_size, hidden_layer_nodes2) / np.sqrt(hidden_layer_nodes2)
    b3 = np.zeros((output_size, 1))

    loss_history = []

    for i in range(epochs):
        forward_params = forward_propagation(X, W1, b1, W2, b2, W3, b3)
        A3 = forward_params["A3"]
        loss = loss_function(A3, y)

        grads = backward_propagation(forward_params, X, y)

        W1 -= learning_rate * grads["dW1"]

        b1 -= learning_rate * grads["db1"]

        W2 -= learning_rate * grads["dW2"]
        b2 -= learning_rate * grads["db2"]

        W3 -= learning_rate * grads["dW3"]
        b3 -= learning_rate * grads["db3"]


        if i % 1000 == 0:
            loss_history.append(loss)
            print ("Costo e iteracion %i: %f" % (i, loss))

    return W1, b1, W2, b2, W3, b3