因为我试图理解不同神经网络模型所基于的机制,所以我决定只使用numpy实现一个Adam Optimizer,而不需要额外的框架。 当我将数据集改组为
时def shuffling(X_Train, Y_Train, m, minibatch_size):
permutation = list(np.random.permutation(m))
shuffled_X = X_Train[:, permutation]
shuffled_Y = Y_Train[:, permutation]
n_comp_minibatches = math.floor(m / minibatch_size)+1
minibatches = [(shuffled_X[:,i*minibatch_size:(i+1)*minibatch_size],
shuffled_Y[:,i*minibatch_size:(i+1)*minibatch_size])
for i in range(n_comp_minibatches)]
return minibatches
我后来根据以下算法继续实施模型
def AdamModel(X_Train, Y_Train, lay_size, learning_rate, minibatch_size,
beta1, beta2, epsilon, n_epoch, print_cost=False):
#Implements the complete model
#Incudes shuffling of minibatches at each epoch
L=len(lay_size)
costs=[]
t=0 #Initialize the counter for Adam update +1 at each epoch
m=X_Train.shape[1]
#Initialization of parameters
parameters = initialize_parameters(lay_size)
#Initialization of v, s for Adam
v, s = initialize_Adam(parameters)
#iterates the procedure for n_epoch
for n in range(n_epoch):
#Permutation of X_Train, Y_Train and creation of minibatches
minibatches = shuffling(X_Train, Y_Train, m, minibatch_size)
#Iterate the forward-backward procedure for each minibatch
for minibatch in minibatches:
#Unpacking of minibatch content
(minibatch_X, minibatch_Y) = minibatch
#Forward-prop for the minibatch
AL, caches = L_lay_forw(minibatch_X, parameters) <---
#Computes the cost associated to the output of the minibatch
cost = compute_cost(AL, minibatch_Y)
#Computation of gradients
grads = L_lay_back(minibatch_Y, AL, caches)
#Parameters updating procedure
t +=1
parameters = upd_para_adam(parameters, grads, v, s, t,
learning_rate, beta1, beta2, epsilon)
if print_cost and n%20==0:
print ("Cost after epoch %i: %f" %(n, cost))
if print_cost and n%20==0:
costs.append(cost)
#Plot the graph related to the learning instance
plt.plot(costs)
plt.ylabel("Cost")
plt.xlabel("Epoch (per twentieth)")
plt.title("Learning rate :"+str(learning_rate))
plt.show()
return parameters
我收到以下错误 - “Traceback:tuple indeces必须是整数或切片,而不是str” - 对于“AL,caches = L_lay_forw(minibatch_X,参数) - &gt; A,cache = linear_act_forward(A_prev,parameters [ “W”+ str(l)],参数[“b”+ str(l)],行为)“但我无法理解为什么。
def L_lay_forw(X, parameters):
#Iterates the linear_act_forward process across the entire architecture
#stores all the "cache" in a caches list
#Stores all the activations in a A_l list
caches=[]
A_l=[]
L=len(parameters)//2
A=X
for l in range(1,L+1):
A_prev=A
if l!=L:
act="ReLu"
elif l==L:
act="Softmax"
A, cache = linear_act_forward(A_prev, parameters["W"+str(l)],
parameters["b"+str(l)], act)
caches.append(cache)
A_l.append(A)
AL= A
具体来说,我无法定义哪些参数表现为需要元组索引的对象。有人能帮帮我吗?