我一直在研究python上的tensorflow模型。一切似乎都很好,但是当我添加一个早期停止程序时,内存突然加倍。似乎是在重复所有变量,包括嵌入的层。以下是我的代码片段。批处理大小为128。但是,嵌入量很大,因此当它加倍时,它将耗尽GPU内存。
with tf.Session(config=config) as sess:
sess.run(tf.global_variables_initializer(), feed_dict={self.edge_emb_init: self.edge_features})
sess.run(tf.local_variables_initializer())
sess.graph.finalize() # Graph is read-only after this statement.
for e in range(nepochs):
self.rng.shuffle(allidx)
# self.rng.shuffle(allidxu)
# self.edge_attention()
epoch_cost = 0
for batchl in self.fetch_batches(allidx, nbatches, batchsize):
if verbose:
epoch_cost += sess.run([train_op, self.cost],
feed_dict={self.X: train_mask[batchl],
# self.Xu: u_mask[batchu],
self.Y: Y_train[batchl, :].toarray(),
self.is_training: False,
self.get_path: False})[1]
else:
sess.run(train_op,
feed_dict={self.X: train_mask[batchl],
# self.Xu: u_mask[batchu],
self.Y: Y_train[batchl, :].toarray(),
self.is_training: False,
self.get_path: False})
epoch_cost /= nbatches
if verbose:
cost_only = False
if cost_only:
print('[{0:5d}] E={1:.4f}\n'.format(e, epoch_cost))
else:
# #output after each epoch
train_acc = sess.run(self.acc,
feed_dict={self.X: train_mask[0:batchsize],
self.Y: Y_train[0:batchsize, :].toarray(),
self.is_training: False,
self.get_path: False})
print( '[{0:5d}] TrainE={1:.4f} TrainAcc={2:.4f}'.format( e, epoch_cost, train_acc ))
# pdb.set_trace()
if train_ratio < 1:
val_cost = 0
for batchl in self.fetch_batches(allidx_val, nbatches_val, batchsize):
val_cost += sess.run(self.cost,
feed_dict={self.X: val_mask[batchl],
self.Y: Y_val[batchl, :].toarray(),
self.is_training: False,
self.get_path: False})
val_cost /= nbatches_val
# pdb.set_trace()
to_stop = self.early_stop_criteria(val_cost, sess, saver)
if to_stop == True:
break
如果我排除了提早停止(即从if train_ratio < 1:
开始),则内存利用率保持不变,因为图形初始化后不再添加任何节点。使用Tensorflow进行此内存复制可能背后有原因吗?还是由于我的实施?