我用Tensorflow BERT语言模型创建了一个二进制分类器。这是link。训练模型后,它将保存模型并生成以下文件。
预测代码。
from tensorflow.contrib import predictor
#MODEL_FILE = 'graph.pbtxt'
with tf.Session() as sess:
predict_fn = predictor.from_saved_model(f'/content/drive/My Drive/binary_class/bert/graph.pbtxt')
predictions = predict_fn(pred_sentences)
print(predictions)
错误
OSError: SavedModel file does not exist at: /content/drive/My Drive/binary_class/bert/graph.pbtxt/{saved_model.pbtxt|saved_model.pb}
深入探讨此问题。我遇到了tf.train.Saver()类,用于保存模型。我将估算器火车代码更改为以下代码以保存模型。我提到了这个link。我猜想tensorflow估计量可以和它一起保存。
init_op = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init_op)
# Do some work with the model.
saver = tf.train.Saver()
estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
# Save the variables to disk.
save_path = saver.save(sess, f'/content/drive/My Drive/binary_class/bert/tmp/model.ckpt')
这是错误。
Beginning Training!
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-38-0c9f9b70d76b> in <module>()
9 sess.run(init_op)
10 # Do some work with the model.
---> 11 saver = tf.train.Saver()
12 estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
13 # Save the variables to disk.
2 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/training/saver.py in _build(self, checkpoint_path, build_save, build_restore)
860 return
861 else:
--> 862 raise ValueError("No variables to save")
863 self._is_empty = False
864
ValueError: No variables to save
变量,权重在create_model函数中创建。为了保存训练有素的模型,我应该更改什么?
更新: 代码以保存模型。我不确定feature_spec和特征张量。
feature_spec = {'x': tf.VarLenFeature(tf.string)}
def serving_input_receiver_fn():
serialized_tf_example = tf.placeholder(dtype=tf.string,
shape=[1], # batch size
name='input_example_tensor')
receiver_tensors = {'examples': serialized_tf_example}
features = tf.parse_example(serialized_tf_example, feature_spec)
return tf.estimator.export.ServingInputReceiver(features, receiver_tensors)
# Export the estimator
export_path = f'/content/drive/My Drive/binary_class/bert/'
estimator.export_saved_model(
export_path,
serving_input_receiver_fn=serving_input_receiver_fn)
我收到此错误:-
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-55-209298910d1e> in <module>()
16 estimator.export_saved_model(
17 export_path,
---> 18 serving_input_receiver_fn=serving_input_receiver_fn)
4 frames
/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py in export_saved_model(self, export_dir_base, serving_input_receiver_fn, assets_extra, as_text, checkpoint_path, experimental_mode)
730 as_text=as_text,
731 checkpoint_path=checkpoint_path,
--> 732 strip_default_attrs=True)
733
734 def experimental_export_all_saved_models(
/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py in _export_all_saved_models(self, export_dir_base, input_receiver_fn_map, assets_extra, as_text, checkpoint_path, strip_default_attrs)
854 builder, input_receiver_fn_map, checkpoint_path,
855 save_variables, mode=ModeKeys.PREDICT,
--> 856 strip_default_attrs=strip_default_attrs)
857 save_variables = False
858
/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py in _add_meta_graph_for_mode(self, builder, input_receiver_fn_map, checkpoint_path, save_variables, mode, export_tags, check_variables, strip_default_attrs)
927 labels=getattr(input_receiver, 'labels', None),
928 mode=mode,
--> 929 config=self.config)
930
931 export_outputs = export_lib.export_outputs_for_mode(
/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py in _call_model_fn(self, features, labels, mode, config)
1144
1145 logging.info('Calling model_fn.')
-> 1146 model_fn_results = self._model_fn(features=features, **kwargs)
1147 logging.info('Done calling model_fn.')
1148
<ipython-input-17-119a3167bf33> in model_fn(features, labels, mode, params)
5 """The `model_fn` for TPUEstimator."""
6
----> 7 input_ids = features["input_ids"]
8 input_mask = features["input_mask"]
9 segment_ids = features["segment_ids"]
KeyError: 'input_ids'
答案 0 :(得分:1)
笔记本中存在的 create_model 函数带有一些参数。这些功能将传递给模型。
通过将serving_input_fn函数更新为以下内容,该serving函数将按预期工作。
更新代码
def serving_input_receiver_fn():
feature_spec = {
"input_ids" : tf.FixedLenFeature([MAX_SEQ_LENGTH], tf.int64),
"input_mask" : tf.FixedLenFeature([MAX_SEQ_LENGTH], tf.int64),
"segment_ids" : tf.FixedLenFeature([MAX_SEQ_LENGTH], tf.int64),
"label_ids" : tf.FixedLenFeature([], tf.int64)
}
serialized_tf_example = tf.placeholder(dtype=tf.string,
shape=[None],
name='input_example_tensor')
print(serialized_tf_example.shape)
receiver_tensors = {'example': serialized_tf_example}
features = tf.parse_example(serialized_tf_example, feature_spec)
return tf.estimator.export.ServingInputReceiver(features, receiver_tensors)
export_path = '/content/drive/My Drive/binary_class/bert/'
estimator._export_to_tpu = False # this is important
estimator.export_saved_model(export_dir_base=export_path,serving_input_receiver_fn=serving_input_receiver_fn)
答案 1 :(得分:0)
在serving_input_receiver_fn中使用feature_spec
字典对我不起作用。我使用了以下someone else asking the same question中的serving_input_fn
。
要使用加载的估算器:
def serving_input_fn():
label_ids = tf.placeholder(tf.int32, [None], name='label_ids')
input_ids = tf.placeholder(tf.int32, [None, MAX_SEQ_LEN], name='input_ids')
input_mask = tf.placeholder(tf.int32, [None, MAX_SEQ_LEN], name='input_mask')
segment_ids = tf.placeholder(tf.int32, [None, MAX_SEQ_LEN], name='segment_ids')
input_fn = tf.estimator.export.build_raw_serving_input_receiver_fn(
{
'label_ids': label_ids,
'input_ids': input_ids,
'input_mask': input_mask,
'segment_ids': segment_ids
}
)()
return input_fn
export_path = '../testing'
estimator._export_to_tpu = False # this is important
estimator.export_saved_model(export_dir_base=export_path,serving_input_receiver_fn=serving_input_fn)
from tensorflow.contrib import predictor
predict_fn = predictor.from_saved_model('../testing/1589420991')
def predict(sentences, predict_fn):
labels = [0, 1]
input_examples = [
run_classifier.InputExample(
guid="",
text_a = x,
text_b = None,
label = 0
) for x in sentences] # here, "" is just a dummy label
input_features = run_classifier.convert_examples_to_features(
input_examples, labels, MAX_SEQ_LEN, tokenizer
)
all_input_ids = []
all_input_mask = []
all_segment_ids = []
all_label_ids = []
for feature in input_features:
all_input_ids.append(feature.input_ids)
all_input_mask.append(feature.input_mask)
all_segment_ids.append(feature.segment_ids)
all_label_ids.append(feature.label_id)
pred_dict = {
'input_ids': all_input_ids,
'input_mask': all_input_mask,
'segment_ids': all_segment_ids,
'label_ids': all_label_ids
}
predictions = predict_fn(pred_dict)
return [
(sentence, prediction, label)
for sentence, prediction, label in zip(pred_sentences, predictions['probabilities'], predictions['labels'])
]
pred_sentences = [
"That movie was absolutely awful",
"The acting was a bit lacking",
"The film was creative and surprising",
"Absolutely fantastic!",
]
predictions = predict(pred_sentences, predict_fn)
print(predictions)
[('That movie was absolutely awful',
array([-0.26713806, -1.4505868 ], dtype=float32),
0),
('The acting was a bit lacking',
array([-0.23832974, -1.5508994 ], dtype=float32),
0),
('The film was creative and surprising',
array([-0.2784096, -1.4146391], dtype=float32),
0),
('Absolutely fantastic!',
array([-0.29031944, -1.3784236 ], dtype=float32),
0),
('The patient has diabetes',
array([-0.33836085, -1.2480571 ], dtype=float32),
0),
('The patient does not have diabetes',
array([-0.29378486, -1.3682064 ], dtype=float32),
0)]