我尝试构建预测下一个单词的模型(在我的例子中为URL)。在关注mnist示例后,我陷入了预测部分。 我的python代码:
import argparse
import sys
import os
import re
import numpy as np
import pandas
import tensorflow as tf
import url_datasets
from tensorflow.contrib.learn.python.learn.preprocessing import text
from tensorflow.python.framework import dtypes
tf.app.flags.DEFINE_integer('model_version', 1, 'version number of the model.')
tf.app.flags.DEFINE_string('work_dir', '/tmp/suc', 'Working directory.')
FLAGS = tf.app.flags.FLAGS
MAX_DOCUMENT_LENGTH = 40
EMBEDDING_SIZE = 40
n_words = 0
MAX_LABEL = 50
WORDS_FEATURE = 'words' # Name of the input words feature.
TOKENIZER_RE = re.compile(r'([/a-z_-]*)\s')
def tokenizer(iterator):
"""Tokenizer generator.
Args:
iterator: Input iterator with strings.
Yields:
array of tokens per each value in the input.
"""
for value in iterator:
print(value)
print(TOKENIZER_RE.findall(value))
yield TOKENIZER_RE.findall(value)
def estimator_spec_for_softmax_classification(
logits, labels, mode):
"""Returns EstimatorSpec instance for softmax classification."""
predicted_classes = tf.argmax(logits, 1)
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(
mode=mode,
predictions={
'class': predicted_classes,
'prob': tf.nn.softmax(logits)
})
onehot_labels = tf.one_hot(labels, MAX_LABEL, 1, 0)
loss = tf.losses.softmax_cross_entropy(
onehot_labels=onehot_labels, logits=logits)
if mode == tf.estimator.ModeKeys.TRAIN:
optimizer = tf.train.AdamOptimizer(learning_rate=0.1)
train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
eval_metric_ops = {
'accuracy': tf.metrics.accuracy(
labels=labels, predictions=predicted_classes)
}
return tf.estimator.EstimatorSpec(
mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
def updatePrediction(prediction):
file = open("/tmp/ai/prediction.txt","a")
file.write(str(prediction))
file.close()
def rnn_model(features, labels, mode):
word_vectors = tf.contrib.layers.embed_sequence(
features[WORDS_FEATURE], vocab_size=n_words, embed_dim=EMBEDDING_SIZE)
word_list = tf.unstack(word_vectors, axis=1)
cell = tf.contrib.rnn.GRUCell(EMBEDDING_SIZE)
_, encoding = tf.contrib.rnn.static_rnn(cell, word_list, dtype=tf.float32)
logits = tf.layers.dense(encoding, MAX_LABEL, activation=None)
return estimator_spec_for_softmax_classification(
logits=logits, labels=labels, mode=mode)
def main(_):
sess = tf.InteractiveSession()
serialized_tf_example = tf.placeholder(tf.string, name='tf_example')
global n_words
urls = url_datasets.load_urls('/tmp/ai/demo')
x_train = pandas.Series(urls.train.data[:,1])
labels = pandas.Series(urls.train.data[:,0])
y_train = pandas.Series(urls.train.target)
x_test = pandas.Series(urls.test.data[:,1])
y_test = pandas.Series(urls.test.target)
vocab_processor = text.VocabularyProcessor(MAX_DOCUMENT_LENGTH,
min_frequency=0,
tokenizer_fn=tokenizer)
vocab_processor.fit(labels)
x_transform_train = vocab_processor.fit_transform(x_train)
x_transform_test = vocab_processor.transform(x_test)
x_train = np.array(list(x_transform_train))
x_test = np.array(list(x_transform_test))
print(vocab_processor.vocabulary_._mapping)
n_words = len(vocab_processor.vocabulary_)
vocab_dict = vocab_processor.vocabulary_._mapping
model_fn = rnn_model
classifier = tf.estimator.Estimator(model_fn=model_fn)
train_input_fn = tf.estimator.inputs.numpy_input_fn(
x={WORDS_FEATURE: x_train},
y=y_train,
batch_size=len(x_train),
num_epochs=None,
shuffle=False)
classifier.train(input_fn=train_input_fn, steps=100)
test_input_fn = tf.estimator.inputs.numpy_input_fn(
x={WORDS_FEATURE: x_test},
y=y_test,
num_epochs=1,
shuffle=False)
predictions = classifier.predict(input_fn=test_input_fn)
export_path_base = sys.argv[-1]
export_path = os.path.join(
tf.compat.as_bytes(export_path_base),
tf.compat.as_bytes(str(FLAGS.model_version)))
print('Exporting trained model to', export_path)
builder = tf.saved_model.builder.SavedModelBuilder(export_path)
y_predicted = np.array(list(p['class'] for p in predictions))
inverseDictionary = dict(zip(vocab_dict.values(), vocab_dict.keys()))
for prediction in y_predicted:
print("prediction:"+inverseDictionary[int(prediction)])
updatePrediction(prediction)
print("--> %s" % prediction)
tensor_info_x = tf.saved_model.utils.build_tensor_info(serialized_tf_example)
tensor_info_y = tf.saved_model.utils.build_tensor_info(tf.convert_to_tensor(y_predicted, tf.float32))
classification_inputs = tf.saved_model.utils.build_tensor_info(
serialized_tf_example)
classification_signature = (
tf.saved_model.signature_def_utils.build_signature_def(
inputs={
tf.saved_model.signature_constants.CLASSIFY_INPUTS:
classification_inputs
},
outputs={
tf.saved_model.signature_constants.CLASSIFY_OUTPUT_SCORES:
tensor_info_y
},
method_name=tf.saved_model.signature_constants.CLASSIFY_METHOD_NAME))
prediction_signature = (
tf.saved_model.signature_def_utils.build_signature_def(
inputs={'x_strings': tensor_info_x},
outputs={'scores': tensor_info_y},
method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME))
legacy_init_op = tf.group(tf.tables_initializer(), name='legacy_init_op')
builder.add_meta_graph_and_variables(
sess, [tf.saved_model.tag_constants.SERVING],
signature_def_map={
'predict_url':
prediction_signature,
tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
classification_signature,
},
legacy_init_op=legacy_init_op)
builder.save()
print('Done exporting!')
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument(
'--test_with_fake_data',
default=False,
help='Test the example code with fake data.',
action='store_true')
parser.add_argument(
'--bow_model',
default=False,
help='Run with BOW model instead of RNN.',
action='store_true')
tf.app.run()

我的输入数据/tmp/ai/demo/train.csv
1,/url/a ,/url/a /url/a
2,/url/b ,/url/a /url/c
3,/url/c ,/url/a /url/b
4,/url/d ,/url/b /url/c
5,/url/e ,/url/c /url/d
2,/url/b ,/url/c /url/a
6,/url/f ,/url/d /url/e
6,/url/f ,/url/e /url/g
6,/url/f ,/url/h /url/g
7,/url/g ,/url/e /url/f
7,/url/g ,/url/f /url/h
7,/url/g ,/url/i /url/h
8,/url/h ,/url/f /url/g
9,/url/i ,/url/g /url/h
1,/url/a ,/url/h /url/i

/tmp/ai/demo/test.csv
0,test,/url/b /url/c

当我构建模型并存储它时,一切正常。预测是正确的。但现在我想使用占位符代替x_test: x_test = pandas.Series(urls.test.data [:,1])
我的客户:
import sys
import threading
from grpc.beta import implementations
import tensorflow as tf
from tensorflow_serving.apis import predict_pb2
from tensorflow_serving.apis import prediction_service_pb2
from tensorflow.core.framework import types_pb2
tf.app.flags.DEFINE_integer('concurrency', 1,
'maximum number of concurrent inference requests')
tf.app.flags.DEFINE_string('server', '', 'PredictionService host:port')
tf.app.flags.DEFINE_string('work_dir', '/tmp', 'Working directory. ')
FLAGS = tf.app.flags.FLAGS
def do_prediction(hostport, work_dir, concurrency):
host, port = hostport.split(':')
channel = implementations.insecure_channel(host, int(port))
stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)
request = predict_pb2.PredictRequest()
request.model_spec.name = 'predict_url'
request.model_spec.signature_name = 'predict_url'
request.inputs['x_strings'].dtype = types_pb2.DT_STRING
request.inputs['x_strings'].string_val.append('/url/a /url/b ')
result = stub.Predict(request, 5.0) # 5 seconds
return result
def main(_):
if not FLAGS.server:
print('please specify server host:port')
return
prediction = do_prediction(FLAGS.server, FLAGS.work_dir,
FLAGS.concurrency)
print('\nPrediction from url_classify_client: %s%%' % prediction)
if __name__ == '__main__':
tf.app.run()

每当我用占位符替换x_test时,来自服务请求的响应总是相同的:
Prediction from url_classify_client: outputs {
key: "scores"
value {
dtype: DT_FLOAT
tensor_shape {
dim {
size: 1
}
}
float_val: 4.0
}
}

更新 我更新的导出模型文件:
import argparse
import sys
import os
import re
import numpy as np
import pandas
import tensorflow as tf
import url_datasets
from tensorflow.contrib.learn.python.learn.preprocessing import text
from tensorflow.python.framework import dtypes
tf.app.flags.DEFINE_integer('model_version', 1, 'version number of the model.')
tf.app.flags.DEFINE_string('work_dir', '/tmp/suc', 'Working directory.')
FLAGS = tf.app.flags.FLAGS
MAX_DOCUMENT_LENGTH = 40
EMBEDDING_SIZE = 40
n_words = 0
MAX_LABEL = 50
WORDS_FEATURE = 'words' # Name of the input words feature.
TOKENIZER_RE = re.compile(r'([/a-z_-]*)\s')
def tokenizer(iterator):
"""Tokenizer generator.
Args:
iterator: Input iterator with strings.
Yields:
array of tokens per each value in the input.
"""
for value in iterator:
yield TOKENIZER_RE.findall(value)
def estimator_spec_for_softmax_classification(
logits, labels, mode):
"""Returns EstimatorSpec instance for softmax classification."""
predicted_classes = tf.argmax(logits, 1)
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(
mode=mode,
predictions={
'class': predicted_classes,
'prob': tf.nn.softmax(logits)
})
onehot_labels = tf.one_hot(labels, MAX_LABEL, 1, 0)
loss = tf.losses.softmax_cross_entropy(
onehot_labels=onehot_labels, logits=logits)
if mode == tf.estimator.ModeKeys.TRAIN:
optimizer = tf.train.AdamOptimizer(learning_rate=0.1)
train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
eval_metric_ops = {
'accuracy': tf.metrics.accuracy(
labels=labels, predictions=predicted_classes)
}
return tf.estimator.EstimatorSpec(
mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
def updatePrediction(prediction):
file = open("/tmp/ai/prediction.txt","a")
file.write(str(prediction))
file.close()
def customTestFn(input, vocab_processor):
feature_configs = {'x_strings': tf.FixedLenFeature(shape=[1], dtype=tf.string)}
tf_example = tf.parse_example(input, feature_configs)
x_transform_test = vocab_processor.fit_transform(tf_example)
return np.array(list(x_transform_test))
def rnn_model(features, labels, mode):
word_vectors = tf.contrib.layers.embed_sequence(
features[WORDS_FEATURE], vocab_size=n_words, embed_dim=EMBEDDING_SIZE)
word_list = tf.unstack(word_vectors, axis=1)
cell = tf.contrib.rnn.GRUCell(EMBEDDING_SIZE)
_, encoding = tf.contrib.rnn.static_rnn(cell, word_list, dtype=tf.float32)
logits = tf.layers.dense(encoding, MAX_LABEL, activation=None)
return estimator_spec_for_softmax_classification(
logits=logits, labels=labels, mode=mode)
def main(_):
urls = url_datasets.load_urls('/tmp/ai/demo')
sess = tf.InteractiveSession()
serialized_tf_example = tf.placeholder(tf.string, name='x_strings')
global n_words
x_train = pandas.Series(urls.train.data[:,1])
labels = pandas.Series(urls.train.data[:,0])
y_train = pandas.Series(urls.train.target)
y_test = pandas.Series(urls.test.target)
vocab_processor = text.VocabularyProcessor(MAX_DOCUMENT_LENGTH,
min_frequency=0,
tokenizer_fn=tokenizer)
vocab_processor.fit(labels)
x_transform_train = vocab_processor.fit_transform(x_train)
x_train = np.array(list(x_transform_train))
print(vocab_processor.vocabulary_._mapping)
n_words = len(vocab_processor.vocabulary_)
vocab_dict = vocab_processor.vocabulary_._mapping
model_fn = rnn_model
classifier = tf.estimator.Estimator(model_fn=model_fn)
train_input_fn = tf.estimator.inputs.numpy_input_fn(
x={WORDS_FEATURE: x_train},
y=y_train,
batch_size=len(x_train),
num_epochs=None,
shuffle=False)
classifier.train(input_fn=train_input_fn, steps=100)
export_path_base = sys.argv[-1]
export_path = os.path.join(
tf.compat.as_bytes(export_path_base),
tf.compat.as_bytes(str(FLAGS.model_version)))
print('Exporting trained model to', export_path)
builder = tf.saved_model.builder.SavedModelBuilder(export_path)
sess.run(tf.global_variables_initializer())
test_input_fn = tf.estimator.inputs.numpy_input_fn(
x={WORDS_FEATURE: customTestFn(serialized_tf_example, vocab_processor)},
y=y_test,
num_epochs=1,
shuffle=False)
predictions = classifier.predict(input_fn=test_input_fn)
y_predicted = np.array(list(p['class'] for p in predictions))
inverseDictionary = dict(zip(vocab_dict.values(), vocab_dict.keys()))
for prediction in y_predicted:
print("prediction:"+inverseDictionary[int(prediction)])
updatePrediction(prediction)
print("--> %s" % prediction)
tensor_info_x = tf.saved_model.utils.build_tensor_info(serialized_tf_example)
tensor_info_y = tf.saved_model.utils.build_tensor_info(tf.convert_to_tensor(y_predicted, tf.float32))
classification_inputs = tf.saved_model.utils.build_tensor_info(
serialized_tf_example)
classification_signature = (
tf.saved_model.signature_def_utils.build_signature_def(
inputs={
tf.saved_model.signature_constants.CLASSIFY_INPUTS:
classification_inputs
},
outputs={
tf.saved_model.signature_constants.CLASSIFY_OUTPUT_SCORES:
tensor_info_y
},
method_name=tf.saved_model.signature_constants.CLASSIFY_METHOD_NAME))
prediction_signature = (
tf.saved_model.signature_def_utils.build_signature_def(
inputs={'x_strings': tensor_info_x},
outputs={'scores': tensor_info_y},
method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME))
legacy_init_op = tf.group(tf.tables_initializer(), name='legacy_init_op')
builder.add_meta_graph_and_variables(
sess, [tf.saved_model.tag_constants.SERVING],
signature_def_map={
'predict_url':
prediction_signature,
tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
classification_signature,
},
legacy_init_op=legacy_init_op)
builder.save()
print('Done exporting!')
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument(
'--test_with_fake_data',
default=False,
help='Test the example code with fake data.',
action='store_true')
parser.add_argument(
'--bow_model',
default=False,
help='Run with BOW model instead of RNN.',
action='store_true')
tf.app.run()
每当我运行gRPC我得到相同的回复
Prediction from url_classify_client: outputs {
key: "scores"
value {
dtype: DT_FLOAT
tensor_shape {
dim {
size: 1
}
}
float_val: 7.0
}
}
这与模型导出期间的预测完全相同,因此我认为" y_predicted"服务期间未评估...
我不确定如何在服务期间调试它(我用Bazel运行它)。设置完成后:
export TF_CPP_MIN_VLOG_LEVEL=0
export GRPC_VERBOSITY=DEBUG
export GRPC_TRACE=all
我在日志中收到以下消息(当我执行gRPC请求时):
'PRI * HTTP/2.0....SM......$..................................@................@.:scheme.http@.:method.POST..:path-/tensorflow.serving.PredictionService/Predict@.:authority.localhost:9000@.te.trailers@.content-type.application/grpc@.user-agent8grpc-python/1.4.0 grpc-c/4.0.0 (osx; chttp2; gregarious)@.grpc-accept-encoding.identity,deflate,gzip..grpc-timeout.5S...............B..........=....predict_url..predict_url....x_strings....B./url/a /url/b ..........................'
我的导出命令
python tensorflow/tensorflow/examples/learn/saved_simple_url_classification.py /tmp/saved_rnn
我的服务命令
bazel-bin/tensorflow_serving/model_servers/tensorflow_model_server --port=9000 --model_name=predict_url --model_base_path=/tmp/saved_rnn/ --logtostderr --logdir logs &> grpc_log
我的gRPC命令
python url_classify_client.py --server=localhost:9000