我的神经网络没有收敛,尽管每次报告的损失都较小,损失越小,返回的数值就越低(即,报告的0.003损失通常会导致对正弦的预测小于0.01)
我已经尝试过调整学习率和迭代次数,但是更多的迭代会导致预测的幅度甚至更低,准确度也会大大降低。
'''
X = np.zeros((1,20000))
Y = np.zeros((1,20000))
全局参数
对于范围内的i(np.shape(X)[1]):
X[0][i] = np.random.randint(1, high=90)
Y[0][i] = np.sin(X[0][i])
#print(X[0][i], Y[0][i])
def初始化(n_x,n_y,n_h):
W1 = np.random.randn(n_h, n_x) * 0.01
b1 = np.zeros((n_h, 1)) * 0.01
W2 = np.random.randn(n_y, n_h) * 0.01
b2 = np.zeros((n_y, 1)) * 0.01
return {
"W1" : W1,
"b1" : b1,
"W2" : W2,
"b2" : b2
}
def Sigmoid(z):
a = (1/(1+np.exp(-z)))
#print(a)
return a
def forward_propagate(X,parameters):
W1 = parameters["W1"]
W2 = parameters["W2"]
b1 = parameters["b1"]
b2 = parameters["b2"]
Z1 = np.dot(W1,X) + b1
A1 = np.tanh(Z1)
Z2 = np.dot(W2,A1) + b2
A2 = sigmoid(Z2)
return A1, A2
def compute_cost(A2,Y):
cost = -((1/np.shape(Y)[1]) * np.sum((Y * np.log(A2)) + ((1-Y) *
np.log(1-A2))))
return cost
def back_propagate(X,parameters,A1,A2,Y):
W1 = parameters["W1"]
W2 = parameters["W2"]
m_divisor = 1/np.shape(X)[1]
dZ2 = A2-Y
dW2 = m_divisor * np.dot(dZ2,A2.T)
db2 = m_divisor * np.sum(dZ2, axis = 1, keepdims = True)
dZ1 = W2.T * dZ2 * (1-np.power(A1, 2))
dW1 = m_divisor * np.dot(dZ1,X.T)
db1 = m_divisor * np.sum(dZ1, axis = 1 , keepdims = True)
#print(np.shape(dW2))
return {
"dW1" : dW1,
"db1" : db1,
"dW2" : dW2,
"db2" : db2
}
def更新(成绩,参数,learning_rate):
dW1 = grads["dW1"]
db1 = grads["db1"]
dW2 = grads["dW2"]
db2 = grads["db2"]
W1 = parameters["W1"]
b1 = parameters["b1"]
W2 = parameters["W2"]
b2 = parameters["b2"]
W1 = (W1 - (learning_rate * dW1))
b1 = (b1 - (learning_rate * db1))
W2 = (W2 - (learning_rate * dW2))
b2 = (b2 - (learning_rate * db2))
return {
"W1" : W1,
"b1" : b1,
"W2" : W2,
"b2" : b2
}
def nn_model(n_x,n_y,n_h,迭代次数,学习率):
parameters = initialize(n_x, n_y, n_h)
for i in range(iterations):
A1, A2 = forward_propagate(X, parameters)
cost = compute_cost(A2, Y)
grads = back_propagate(X, parameters, A1, A2, Y)
parameters = update(grads, parameters, learning_rate*cost)
if ((cost % 1000)== 0): print(cost)
return parameters
def预测(X,参数):
A1, A2 = forward_propagate(X, parameters)
return A2
参数= nn_model(1、1、50、100、0.2)
预测(45,参数)
'''
输出的预测为'''array([[0.01085812]])'''