Question

I've trained my NN in Tensorflow and saved the model like this:

def neural_net(x):
   layer_1 = tf.layers.dense(inputs=x, units=195, activation=tf.nn.sigmoid)
   out_layer = tf.layers.dense(inputs=layer_1, units=6)
   return out_layer

train_x = pd.read_csv("data_x.csv", sep=" ")
train_y = pd.read_csv("data_y.csv", sep=" ")
train_x = train_x / 6 - 0.5

train_size = 0.9
train_cnt = int(floor(train_x.shape[0] * train_size))
x_train = train_x.iloc[0:train_cnt].values
y_train = train_y.iloc[0:train_cnt].values
x_test = train_x.iloc[train_cnt:].values
y_test = train_y.iloc[train_cnt:].values

x = tf.placeholder("float", [None, 386])
y = tf.placeholder("float", [None, 6])

nn_output = neural_net(x)

cost = tf.reduce_mean(tf.losses.mean_squared_error(labels=y, predictions=nn_output))
optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(cost)

training_epochs = 5000
display_step = 1000
batch_size = 30

keep_prob = tf.placeholder("float")

saver = tf.train.Saver()
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for epoch in range(training_epochs):
        total_batch = int(len(x_train) / batch_size)
        x_batches = np.array_split(x_train, total_batch)
        y_batches = np.array_split(y_train, total_batch)
        for i in range(total_batch):
            batch_x, batch_y = x_batches[i], y_batches[i]
            _, c = sess.run([optimizer, cost], 
                            feed_dict={
                                x: batch_x, 
                                y: batch_y, 
                                keep_prob: 0.8
                            })
    saver.save(sess, 'trained_model', global_step=1000)

Now I want to use the trained model in a different file. Of course there are many many examples of restoring and saving the model, I went through lots of them. Still I couldn't make any of them work, there is always some kind of error. So this is my restore file, could you please help me to make it restore the saved model?

saver = tf.train.import_meta_graph('trained_model-1000.meta')
y_pred = []
with tf.Session() as sess:
    saver.restore(sess, tf.train.latest_checkpoint('./'))
    sess.run([y_pred], feed_dict={x: input_values})

E.g. this attempt gave me the error "The session graph is empty. Add operations to the graph before calling run()." So what operation should I add to the graph and how? I don't know what that operation should be in my model... I don't understand this whole concept of saving/restoring in Tensorflow. Or should I do the restoring completely differently? Thanks in advance!

Answer 1

请原谅我，如果我错了，tf.train.Saver()只保存变量值而不是图表本身。这意味着如果要在不同的文件中加载模型，则需要重建图形或以某种方式加载图形。 Tensorflow文档说明：

tf.train.Saver对象不仅将变量保存到检查点文件，还恢复变量。请注意，从文件还原变量时，您不必事先初始化它们。

考虑以下示例：

保存模型的一个文件：

# Create some variables.
v1 = tf.get_variable("v1", shape=[3], initializer = tf.zeros_initializer) 
v2 = tf.get_variable("v2", shape=[5], initializer = tf.zeros_initializer)

inc_v1 = v1.assign(v1+1)
dec_v2 = v2.assign(v2-1)

# Add an op to initialize the variables.
init_op = tf.global_variables_initializer()

# Add ops to save and restore all the variables.
saver = tf.train.Saver()

# Later, launch the model, initialize the variables, do some work, and save the
# variables to disk.
with tf.Session() as sess:
    sess.run(init_op)
    # Do some work with the model.
    inc_v1.op.run()
    dec_v2.op.run()
    # Save the variables to disk.
    save_path = saver.save(sess, "/tmp/model.ckpt")
    print("Model saved in file: %s" % save_path)

加载以前保存的模型的另一个文件：

tf.reset_default_graph()

# Create some variables.
v1 = tf.get_variable("v1", shape=[3])
v2 = tf.get_variable("v2", shape=[5])

# Add ops to save and restore all the variables.
saver = tf.train.Saver()

# Later, launch the model, use the saver to restore variables from disk, and
# do some work with the model.
with tf.Session() as sess:
   # Restore variables from disk.
   saver.restore(sess, "/tmp/model.ckpt")
   print("Model restored.")
   # Check the values of the variables
   print("v1 : %s" % v1.eval())
   print("v2 : %s" % v2.eval())

Answer 2

 output = sess.run(nn_output, feed_dict={ x: batch_x, keep_prob: 0.8 })

其中nn_output的名称是网络最后一层的输出变量。您可以使用以下方法保存变量：

saver = tf.train.Saver([nn_output])
saver.save(sess, 'my_test_model',global_step=1000) # save every 1000 steps

因此在你的代码中：

out_layer = tf.layers.dense(inputs=layer_1, units=6)

应该是：

out_layer = tf.layers.dense(inputs=layer_1, units=6, name='nn_output')

恢复：

with tf.Session() as sess:    
saver = tf.train.import_meta_graph('my_test_model')
saver.restore(sess,tf.train.latest_checkpoint('./'))

现在您应该可以访问图表的该节点。如果未指定名称，则很难恢复该特定图层。

Answer 3

您可以知道使用tf.saved_model.builder.SavedModelBuilder功能。

保存的主要内容：

builder = tf.saved_model.builder.SavedModelBuilder(graph_location)

builder.add_meta_graph_and_variables(sess, ["cnn_mnist"])

builder.save()

保存模型的代码：

...
def main(_):
  # Import data
  mnist = input_data.read_data_sets(FLAGS.data_dir)

  # Create the model
  x = tf.placeholder(tf.float32, [None, 784])

  # Define loss and optimizer
  y_ = tf.placeholder(tf.int64, [None])

  # Build the graph for the deep net
  y_conv, keep_prob = deepnn(x) # an unknow model model

  with tf.name_scope('loss'):
    cross_entropy = tf.losses.sparse_softmax_cross_entropy(
        labels=y_, logits=y_conv)
  cross_entropy = tf.reduce_mean(cross_entropy)

  with tf.name_scope('adam_optimizer'):
    train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)

  with tf.name_scope('accuracy'):
    correct_prediction = tf.equal(tf.argmax(y_conv, 1), y_)
    correct_prediction = tf.cast(correct_prediction, tf.float32)
  accuracy = tf.reduce_mean(correct_prediction)

  graph_location ="tmp/"
  print('Saving graph to: %s' % graph_location)
  **builder = tf.saved_model.builder.SavedModelBuilder(graph_location)**

  train_writer = tf.summary.FileWriter(graph_location)
  train_writer.add_graph(tf.get_default_graph())

  saver = tf.train.Saver(max_to_keep=1)

  with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    **builder.add_meta_graph_and_variables(sess, ["cnn_mnist"])**
    for i in range(20000):
      batch = mnist.train.next_batch(50)
      if i % 100 == 0:
        train_accuracy = accuracy.eval(feed_dict={
            x: batch[0], y_: batch[1], keep_prob: 1.0})
        print('step %d, training accuracy %g' % (i, train_accuracy))
      train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})

    print('test accuracy %g' % accuracy.eval(feed_dict={
        x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))

    **builder.save()**
    saver.save(sess, "tmp/my_checkpoint.ckpt", global_step =0)

if __name__ == '__main__':
  parser = argparse.ArgumentParser()
  parser.add_argument('--data_dir', type=str,
                      default='/tmp/tensorflow/mnist/input_data',
                      help='Directory for storing input data')
  FLAGS, unparsed = parser.parse_known_args()
  tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
`

恢复模型的代码：

import tensorflow as tf

# récupération des poids 

export_dir = "tmp"
sess = tf.Session()
tf.saved_model.loader.load(sess,["cnn_mnist"], export_dir)

#trainable_var = tf.trainable_variables()
trainable_var = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
for var in trainable_var:
    print(var.name)`

Answer 4

这个问题很旧。但是，如果其他人正在努力使用经过训练的模型（使用TF 1.x）进行预测，则此代码可能会有所帮助。

请注意

必须在创建Saver()实例之前执行您的网络构造/定义代码。否则，您将收到错误：ValueError: No variables to save。在LeNet(x)方法下面的代码中，为输入占位符x构建网络。
您不应不在会话中初始化变量。因为显然是从已保存的模型加载变量。


# all the network construction code
# (e.g. defining the variables and layers)
# must be exectured before the creation of 
# the Saver() object. Otherwise you get the 
# error: ValueError: No variables to save. 

logits = LeNet(x)
saver = tf.train.Saver()

index = random.randint(0, len(X_train))
image = X_train[index].squeeze()
label = y_train[index]
print("Label: ", label)

plt.figure(figsize=(1,1))
plt.imshow(image, cmap="gray")
plt.show()

with tf.Session() as sess:
    saver.restore(sess, tf.train.latest_checkpoint('./checkpoints/'))
    logits_output = sess.run(logits, feed_dict={x: image.reshape((1, 32, 32, 1))}) 
    logits_output = logits_output.squeeze()
    pred_output = np.exp(logits_output)/sum(np.exp(logits_output)) #softmax
    print("Logits: ", logits_output)
    print("Prediction output:", pred_output)
    print("Predicted Label: ", np.argmax(pred_output))

How to predict values with a trained Tensorflow model

4 个答案: