我正在运行一个函数'train',我正在尝试使用
保存我的模型 save_path = saver.save(sess, model_path,global_step=1000)
print("Model saved in file: %s" % save_path)
其中model_path是传递给函数'train'的参数(以及定义我的神经网络的其他几个参数)
model_path="Models/LeNet"
我想保存不同参数集的模型以进行比较。因此,当我在循环中运行函数时,有时它,但有时我得到以下错误
FailedPreconditionError (see above for traceback): Failed to rename: Models/LeNet-1000.data-00000-of-00001.tempstate4653798046453900544 to: Models/LeNet-1000.data-00000-of-00001 : The process cannot access the file because it is being used by another process.
[[Node: save/SaveV2 = SaveV2[dtypes=[DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT], _device="/job:localhost/replica:0/task:0/cpu:0"](_recv_save/Const_0, save/SaveV2/tensor_names, save/SaveV2/shape_and_slices, Model/b1, Model/b1/Adam, Model/b1/Adam_1, Model/b2, Model/b2/Adam, Model/b2/Adam_1, Model/b3, Model/b3/Adam, Model/b3/Adam_1, Model/b4, Model/b4/Adam, Model/b4/Adam_1, Model/b5, Model/b5/Adam, Model/b5/Adam_1, Model/w1, Model/w1/Adam, Model/w1/Adam_1, Model/w2, Model/w2/Adam, Model/w2/Adam_1, Model/w3, Model/w3/Adam, Model/w3/Adam_1, Model/w4, Model/w4/Adam, Model/w4/Adam_1, Model/w5, Model/w5/Adam, Model/w5/Adam_1, SGD/beta1_power, SGD/beta2_power)]]
我想知道为什么它会创建一个扩展名为“tempstate4653798046453900544”的文件以及为什么它说'进程无法访问该文件,因为它正被另一个进程使用',即使我尝试关闭计算机中的所有进程,除了jupyter笔记本。请注意,此错误与'Tensorflow FailedPreconditionError不同,但所有变量都已初始化'。
我在这里附上我的火车功能:
def train(learning_rate, training_epochs, batch_size, display_step, alg, actFun, logs_path,model_path):
tf.reset_default_graph()
# tf Graph Input: mnist data image of shape 28*28=784
x = tf.placeholder(tf.float32, [None, 784], name='InputData')
# 0-9 digits recognition, 10 classes
y = tf.placeholder(tf.float32, [None, 10], name='LabelData')
with tf.name_scope('Model'):
# Model
xin=tf.reshape(x,[-1,28,28,1]) # reshape the input image to 28x28 size
pred = LeNet5_Model(xin,actFun)
with tf.name_scope('Loss'):
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=pred))
print ('cost',cost)
with tf.name_scope('SGD'):
optimizer = alg(learning_rate).minimize(cost)
with tf.name_scope('Accuracy'):
# Accuracy
acc = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
acc = tf.reduce_mean(tf.cast(acc, tf.float32))
init = tf.global_variables_initializer()
tf.summary.scalar("Loss", cost)
tf.summary.scalar("Accuracy", acc)
merged_summary_op = tf.summary.merge_all()
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(init)
train_writer= tf.summary.FileWriter(logs_path+'/train', graph=tf.get_default_graph())
validation_writer= tf.summary.FileWriter(logs_path+'/validation', graph=tf.get_default_graph())
for epoch in range(training_epochs):
avg_cost = 0.
total_batch = int(mnist.train.num_examples/batch_size)
for i in range(total_batch):
batch_xs, batch_ys = mnist.train.next_batch(batch_size, shuffle=True)
_, c, acc_train, summary = sess.run([optimizer, cost, acc, merged_summary_op],
feed_dict={x: batch_xs, y: batch_ys})
train_writer.add_summary(summary, epoch * total_batch + i)
avg_cost += c / total_batch
accValid, summary_validation= sess.run([acc,merged_summary_op],feed_dict={x: X_validation, y: y_validation})
validation_writer.add_summary(summary_validation, epoch)
if (epoch+1) % display_step == 0:
print("Epoch: ", '%02d' % (epoch+1), " ====> Loss=", "{:.6f}".format(avg_cost), "Training Accuracy=","{:.6f}".format(acc_train*100),"%", " Validation Accuracy=","{:.6f}".format(accValid*100),"%")
print("Optimization Finished!")
save_path = saver.save(sess, model_path,global_step=1000)
print("Model saved in file: %s" % save_path)