我正在尝试重新创建/重新运行以前的神经网络。我的想法是保存网络在run(1)中初始化的权重和偏差,并在run(2)中重新使用那些精确的权重和偏差进行初始化,这样run(2)的结果就是那个跑步(1)。相关代码如下。
然而,我无法做到这一点。如果我重新运行网络,请将use_stored_weights
设置为True
(这会提示neural_network_model
恢复以前保存的权重和偏差,并且,我希望,用它们初始化网络),我确实总是得到相同的结果 - 但这些不是我试图模仿的运行结果(事实上,它们更糟糕)。同样奇怪的是,我总是得到相同的结果 - 因此它们似乎与存储的权重和偏差无关。
我检查的第一件事是我是否正确地恢复了我的权重和偏见(在neural_network_model
中),情况就是这样。
我不确定这里发生了什么。我有两个怀疑,我不知道如何检查:
neural_network_model
中恢复了正确的权重和偏差,但网络实际上并未使用它们进行初始化。是sess.run(init)
中的命令train_neural_network
以某种方式这样做了吗? ......但它可能是完全不同的东西。任何帮助非常感谢!
更新:我弄清楚发生了什么:当use_stored_weights
设置为True
时,权重始终初始化为全零。据我所知,这归因于sess.run(init)
中的train_neural_network
。但是,如果我遗漏该行,我会收到错误:
FailedPreconditionError: Attempting to use uninitialized value b2
所以我想我现在的问题是:如何在train_neural_network
中访问恢复的权重和偏差?
进口:
import tensorflow as tf
import numpy as np
from numpy import genfromtxt
此功能构建神经网络:
def neural_network_model(data, layer_sizes, use_stored_weights):
num_layers = len(layer_sizes) - 1 # hidden and output layers
weights = {}
biases = {}
# initialise the weights
# (a) create new weights and biases
if not use_stored_weights:
for i in range(num_layers):
w_name = 'W' + str(i+1)
b_name = 'b' + str(i+1)
weights[w_name] = tf.get_variable(w_name, [layer_sizes[i], layer_sizes[i+1]],
initializer = tf.contrib.layers.xavier_initializer(), dtype=tf.float32)
biases[b_name] = tf.get_variable(b_name, [layer_sizes[i+1]],
initializer = tf.zeros_initializer(), dtype=tf.float32)
# save weights and biases
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
save_path = saver.save(sess, fold_path + 'weights/' + 'weights.ckpt')
# (b) restore weights and biases
else:
for i in range(num_layers):
# prepare variable
w_name = 'W' + str(i+1)
b_name = 'b' + str(i+1)
weights[w_name] = tf.get_variable(w_name, [layer_sizes[i], layer_sizes[i+1]],
initializer = tf.zeros_initializer(), dtype=tf.float32)
biases[b_name] = tf.get_variable(b_name, [layer_sizes[i+1]],
initializer = tf.zeros_initializer(), dtype=tf.float32)
saver = tf.train.Saver()
with tf.Session() as sess:
saver.restore(sess, fold_path + 'weights/' + 'weights.ckpt')
# calculate linear and relu outputs for hidden layers
a_prev = data
for i in range(len(weights)-1):
z = tf.add(tf.matmul(a_prev, weights['W' + str(i+1)]), biases['b' + str(i+1)])
a = tf.nn.relu(z)
a_r = tf.nn.dropout(a, keep_prob)
a_prev = a_r
# calculate linear output for output layer
z_o = tf.add(tf.matmul(a_prev, weights['W' + str(len(weights))]), biases['b' + str(len(weights))])
return z_o
此功能训练和评估网络:
def train_neural_network(x, layer_sizes, use_stored_weights):
prediction = neural_network_model(x, layer_sizes, use_stored_weights)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=lrn_rate).minimize(cost)
softm = tf.nn.softmax(prediction)
pred_class = tf.argmax(softm)
costs = []
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
for epoch in range(epochs):
epoch_loss = 0
for _ in range(int(len(x_train)/batch_size)):
epoch_x, epoch_y = x_train, y_train
_, c = sess.run([optimizer, cost], feed_dict = {x: epoch_x, y: epoch_y, keep_prob: kp})
epoch_loss += c
softmaxes_tst = sess.run([softm, pred_class], feed_dict={x: x_test, keep_prob: 1.0})[0]
incorr = 0
for i in range(len(softmaxes_tst)):
curr_sm = softmaxes_tst[i]
curr_lbl = y_test[i]
if np.argmax(curr_sm) != np.argmax(curr_lbl):
incorr += 1
print('incorr: ', incorr)
num_ex = len(x_test)
print('acc = ' + str(num_ex-incorr) + '/' + str(num_ex) + ' = ' + str((num_ex-incorr)/num_ex))
print('accuracy train:', accuracy.eval({x:x_train, y:y_train, keep_prob: kp}))
print('accuracy test :', accuracy.eval({x:x_test, y:y_test, keep_prob: 1.0}))
最后,这是调用上述函数的代码:
lrn_rate = 0.01
kp = 0.85
epochs = 400
path = '/my/path/to/data/'
fold_path = ''
num_folds = 19
for i in range(num_folds):
tf.reset_default_graph()
fold_path = path + 'fold_' + str(i+1) + '/'
x_train = genfromtxt(fold_path + 'fv_train.csv', delimiter=',')
y_train = genfromtxt(fold_path + 'lbl_train.csv', delimiter=',')
x_test = genfromtxt(fold_path + 'fv_test.csv', delimiter=',')
y_test = genfromtxt(fold_path + 'lbl_test.csv', delimiter=',')
num_classes = len(y_train[0])
num_features = len(x_train[0])
batch_size = len(x_train)
num_nodes_hl1 = num_features
num_nodes_hl2 = num_features
layer_sizes = [num_features, num_nodes_hl1, num_nodes_hl2, num_classes]
x = tf.placeholder('float', [None, num_features])
y = tf.placeholder('float')
keep_prob = tf.placeholder('float')
use_stored_weights = True
train_neural_network(x, layer_sizes, use_stored_weights)