我正在尝试调整http://cs231n.github.io/neural-networks-case-study/#together中的示例来为数字目标变量创建神经网络,因此它将成为具有回归的神经网络。我确实在派生部分做了一些错误,因为我的损失在疯狂增长中起作用。 这是代码:
h = neurons # size of hidden layer
D = X[0].size
K = 1
W = 0.01 * np.random.randn(D,h)
b = np.zeros((1,h))
W2 = 0.01 * np.random.randn(h,K)
b2 = np.zeros((1,K))
# some hyperparameters
step_size = 1 #learning rate
reg = 0.001 # regularization strength
loss_vec = []
# gradient descent loop
num_examples = X.shape[0]
for i in xrange(1000):
# evaluate class scores, [N x K]
hidden_layer = np.maximum(0, np.dot(X, W) + b) # note, ReLU activation
scores = np.dot(hidden_layer, W2) + b2
loss = np.power(y - scores,2)
#if i % 50 == 0:
loss_vec.append(np.mean(np.abs(loss)))
print "iteration %d: loss %f" % (i, np.mean(np.abs(loss)))
# compute the gradient on scores
dscores = 2*(y-scores) # here I am not sure is correct
# backpropate the gradient to the parameters
# first backprop into parameters W2 and b2
dW2 = np.dot(hidden_layer.T, dscores)
db2 = np.sum(dscores, axis=0, keepdims=True)
# next backprop into hidden layer
dhidden = np.dot(dscores, W2.T)
# backprop the ReLU non-linearity
dhidden[hidden_layer <= 0] = 0
# finally into W,b
dW = np.dot(X.T, dhidden)
db = np.sum(dhidden, axis=0, keepdims=True)
# add regularization gradient contribution
dW2 += reg * W2
dW += reg * W
# perform a parameter update
W += -step_size * dW
b += -step_size * db
W2 += -step_size * dW2
b2 += -step_size * db2
代码输出:
迭代0:损失5786.021888
迭代1:亏损24248543152533318464172949461134213120.000000
迭代2:亏损388137710832824223006297769344993376570435619092
答案 0 :(得分:0)
我注意到了几个重要的错误:
0.0005
,但它取决于数据,隐藏图层的大小等dscores
:scores - y
完整的代码如下:
import numpy as np
# Generate data: learn the sum x[0] + x[1]
np.random.seed(0)
N = 100
D = 2
X_test = np.zeros([N, D])
y = np.zeros([N, 1])
for i in range(N):
X_test[i, :] = np.random.random_integers(0, 4, size=2)
y[i] = X_test[i, 0] + X_test[i, 1]
# Network params
H = 10
W = 0.01 * np.random.randn(D, H)
b = np.zeros([1, H])
W2 = 0.01 * np.random.randn(H, 1)
b2 = np.zeros([1, 1])
# Hyper params
step_size = 0.0005
reg = 0.001
for i in xrange(100):
hidden_layer = np.maximum(0, np.dot(X_test, W) + b)
scores = np.dot(hidden_layer, W2) + b2
reg_loss = 0.5 * reg * np.sum(W * W) + 0.5 * reg * np.sum(W2 * W2)
loss = np.mean(np.power(y - scores, 2)) + reg_loss
print "iteration %d: loss %f" % (i, loss)
dscores = (scores - y)
dW2 = np.dot(hidden_layer.T, dscores)
db2 = np.sum(dscores, axis=0, keepdims=True)
dhidden = np.dot(dscores, W2.T)
dhidden[hidden_layer <= 0] = 0
dW = np.dot(X_test.T, dhidden)
db = np.sum(dhidden, axis=0, keepdims=True)
dW2 += reg * W2
dW += reg * W
W += -step_size * dW
b += -step_size * db
W2 += -step_size * dW2
b2 += -step_size * db2
# Test
X_test = np.array([[1, 0], [0, 1], [2, 3], [2, 2]]).reshape([-1, 2])
y_test = np.array([1, 1, 5, 4]).reshape([-1, 1])
hidden_layer = np.maximum(0, np.dot(X_test, W) + b)
scores = np.dot(hidden_layer, W2) + b2
print 'Average test error = %f' % np.mean((scores - y_test).T)