在张量流中进行RNN文本分类,预测和服务

时间:2018-03-08 11:12:16

标签: python tensorflow text-classification rnn tensorflow-serving

我尝试构建预测下一个单词的模型(在我的例子中为URL)。在关注mnist示例后,我陷入了预测部分。 我的python代码:



import argparse
import sys
import os

import re
import numpy as np
import pandas
import tensorflow as tf
import url_datasets
from tensorflow.contrib.learn.python.learn.preprocessing import text
from tensorflow.python.framework import dtypes

tf.app.flags.DEFINE_integer('model_version', 1, 'version number of the model.')
tf.app.flags.DEFINE_string('work_dir', '/tmp/suc', 'Working directory.')
FLAGS = tf.app.flags.FLAGS

MAX_DOCUMENT_LENGTH = 40
EMBEDDING_SIZE = 40
n_words = 0
MAX_LABEL = 50
WORDS_FEATURE = 'words'  # Name of the input words feature.
TOKENIZER_RE = re.compile(r'([/a-z_-]*)\s')

def tokenizer(iterator):
  """Tokenizer generator.

  Args:
    iterator: Input iterator with strings.

  Yields:
    array of tokens per each value in the input.
  """
  for value in iterator:
    print(value)
    print(TOKENIZER_RE.findall(value))
    yield TOKENIZER_RE.findall(value)

def estimator_spec_for_softmax_classification(
    logits, labels, mode):
  """Returns EstimatorSpec instance for softmax classification."""
  predicted_classes = tf.argmax(logits, 1)
  if mode == tf.estimator.ModeKeys.PREDICT:
    return tf.estimator.EstimatorSpec(
        mode=mode,
        predictions={
            'class': predicted_classes,
            'prob': tf.nn.softmax(logits)
        })

  onehot_labels = tf.one_hot(labels, MAX_LABEL, 1, 0)
  loss = tf.losses.softmax_cross_entropy(
      onehot_labels=onehot_labels, logits=logits)
  if mode == tf.estimator.ModeKeys.TRAIN:
    optimizer = tf.train.AdamOptimizer(learning_rate=0.1)
    train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
    return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)

  eval_metric_ops = {
      'accuracy': tf.metrics.accuracy(
          labels=labels, predictions=predicted_classes)
  }
  return tf.estimator.EstimatorSpec(
      mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)

def updatePrediction(prediction):
  file = open("/tmp/ai/prediction.txt","a")
  file.write(str(prediction))
  file.close()

def rnn_model(features, labels, mode):
  word_vectors = tf.contrib.layers.embed_sequence(
      features[WORDS_FEATURE], vocab_size=n_words, embed_dim=EMBEDDING_SIZE)

  word_list = tf.unstack(word_vectors, axis=1)

  cell = tf.contrib.rnn.GRUCell(EMBEDDING_SIZE)

  _, encoding = tf.contrib.rnn.static_rnn(cell, word_list, dtype=tf.float32)

  logits = tf.layers.dense(encoding, MAX_LABEL, activation=None)
  return estimator_spec_for_softmax_classification(
      logits=logits, labels=labels, mode=mode)

def main(_):
  sess = tf.InteractiveSession()
  serialized_tf_example = tf.placeholder(tf.string, name='tf_example')

  global n_words
  urls = url_datasets.load_urls('/tmp/ai/demo')
  x_train = pandas.Series(urls.train.data[:,1])
  labels = pandas.Series(urls.train.data[:,0])
  y_train = pandas.Series(urls.train.target)
  x_test = pandas.Series(urls.test.data[:,1])
  y_test = pandas.Series(urls.test.target)
  vocab_processor = text.VocabularyProcessor(MAX_DOCUMENT_LENGTH,
               min_frequency=0,
               tokenizer_fn=tokenizer)
  vocab_processor.fit(labels)
  x_transform_train = vocab_processor.fit_transform(x_train)
  x_transform_test = vocab_processor.transform(x_test)
  x_train = np.array(list(x_transform_train))
  x_test = np.array(list(x_transform_test))

  print(vocab_processor.vocabulary_._mapping)

  n_words = len(vocab_processor.vocabulary_)
  vocab_dict = vocab_processor.vocabulary_._mapping
  model_fn = rnn_model

  classifier = tf.estimator.Estimator(model_fn=model_fn)

  train_input_fn = tf.estimator.inputs.numpy_input_fn(
        x={WORDS_FEATURE: x_train},
        y=y_train,
        batch_size=len(x_train),
        num_epochs=None,
        shuffle=False)
  classifier.train(input_fn=train_input_fn, steps=100)


  test_input_fn = tf.estimator.inputs.numpy_input_fn(
      x={WORDS_FEATURE: x_test},
      y=y_test,
      num_epochs=1,
      shuffle=False)
  predictions = classifier.predict(input_fn=test_input_fn)

  export_path_base = sys.argv[-1]
  export_path = os.path.join(
      tf.compat.as_bytes(export_path_base),
      tf.compat.as_bytes(str(FLAGS.model_version)))
  print('Exporting trained model to', export_path)
  builder = tf.saved_model.builder.SavedModelBuilder(export_path)
  y_predicted = np.array(list(p['class'] for p in predictions))
  inverseDictionary = dict(zip(vocab_dict.values(), vocab_dict.keys()))
  for prediction in y_predicted:
       print("prediction:"+inverseDictionary[int(prediction)])
       updatePrediction(prediction)
       print("--> %s" % prediction)


  tensor_info_x = tf.saved_model.utils.build_tensor_info(serialized_tf_example)
  tensor_info_y = tf.saved_model.utils.build_tensor_info(tf.convert_to_tensor(y_predicted, tf.float32))

  classification_inputs = tf.saved_model.utils.build_tensor_info(
      serialized_tf_example)

  classification_signature = (
      tf.saved_model.signature_def_utils.build_signature_def(
          inputs={
              tf.saved_model.signature_constants.CLASSIFY_INPUTS:
                 classification_inputs
          },
          outputs={
              tf.saved_model.signature_constants.CLASSIFY_OUTPUT_SCORES:
                 tensor_info_y
          },
          method_name=tf.saved_model.signature_constants.CLASSIFY_METHOD_NAME))

  prediction_signature = (
      tf.saved_model.signature_def_utils.build_signature_def(
          inputs={'x_strings': tensor_info_x},
          outputs={'scores': tensor_info_y},
          method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME))

  legacy_init_op = tf.group(tf.tables_initializer(), name='legacy_init_op')
  builder.add_meta_graph_and_variables(
      sess, [tf.saved_model.tag_constants.SERVING],
      signature_def_map={
          'predict_url':
              prediction_signature,
          tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
              classification_signature,
      },
      legacy_init_op=legacy_init_op)
  builder.save()

  print('Done exporting!')

if __name__ == '__main__':
  parser = argparse.ArgumentParser()
  parser.add_argument(
      '--test_with_fake_data',
      default=False,
      help='Test the example code with fake data.',
      action='store_true')
  parser.add_argument(
      '--bow_model',
      default=False,
      help='Run with BOW model instead of RNN.',
      action='store_true')
  tf.app.run()




我的输入数据/tmp/ai/demo/train.csv



1,/url/a ,/url/a /url/a
2,/url/b ,/url/a /url/c
3,/url/c ,/url/a /url/b
4,/url/d ,/url/b /url/c
5,/url/e ,/url/c /url/d
2,/url/b ,/url/c /url/a
6,/url/f ,/url/d /url/e
6,/url/f ,/url/e /url/g
6,/url/f ,/url/h /url/g
7,/url/g ,/url/e /url/f
7,/url/g ,/url/f /url/h
7,/url/g ,/url/i /url/h
8,/url/h ,/url/f /url/g
9,/url/i ,/url/g /url/h
1,/url/a ,/url/h /url/i




/tmp/ai/demo/test.csv



0,test,/url/b /url/c




当我构建模型并存储它时,一切正常。预测是正确的。但现在我想使用占位符代替x_test: x_test = pandas.Series(urls.test.data [:,1])

我的客户:



import sys
import threading
from grpc.beta import implementations
import tensorflow as tf
from tensorflow_serving.apis import predict_pb2
from tensorflow_serving.apis import prediction_service_pb2
from tensorflow.core.framework import types_pb2

tf.app.flags.DEFINE_integer('concurrency', 1,
                            'maximum number of concurrent inference requests')
tf.app.flags.DEFINE_string('server', '', 'PredictionService host:port')
tf.app.flags.DEFINE_string('work_dir', '/tmp', 'Working directory. ')
FLAGS = tf.app.flags.FLAGS

def do_prediction(hostport, work_dir, concurrency):


  host, port = hostport.split(':')
  channel = implementations.insecure_channel(host, int(port))
  stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)
  request = predict_pb2.PredictRequest()
  request.model_spec.name = 'predict_url'
  request.model_spec.signature_name = 'predict_url'
  request.inputs['x_strings'].dtype = types_pb2.DT_STRING
  request.inputs['x_strings'].string_val.append('/url/a /url/b ')

  result = stub.Predict(request, 5.0)  # 5 seconds

  return result

def main(_):
  if not FLAGS.server:
    print('please specify server host:port')
    return
  prediction = do_prediction(FLAGS.server, FLAGS.work_dir,
                            FLAGS.concurrency)
  print('\nPrediction from url_classify_client: %s%%' % prediction)


if __name__ == '__main__':
  tf.app.run()




每当我用占位符替换x_test时,来自服务请求的响应总是相同的:



Prediction from url_classify_client: outputs {
  key: "scores"
  value {
dtype: DT_FLOAT
tensor_shape {
  dim {
    size: 1
  }
}
float_val: 4.0
  }
}




更新 我更新的导出模型文件:

import argparse
import sys
import os

import re
import numpy as np
import pandas
import tensorflow as tf
import url_datasets
from tensorflow.contrib.learn.python.learn.preprocessing import text
from tensorflow.python.framework import dtypes

tf.app.flags.DEFINE_integer('model_version', 1, 'version number of the model.')
tf.app.flags.DEFINE_string('work_dir', '/tmp/suc', 'Working directory.')
FLAGS = tf.app.flags.FLAGS

MAX_DOCUMENT_LENGTH = 40
EMBEDDING_SIZE = 40
n_words = 0
MAX_LABEL = 50
WORDS_FEATURE = 'words'  # Name of the input words feature.
TOKENIZER_RE = re.compile(r'([/a-z_-]*)\s')

def tokenizer(iterator):
  """Tokenizer generator.

  Args:
    iterator: Input iterator with strings.

  Yields:
    array of tokens per each value in the input.
  """
  for value in iterator:
    yield TOKENIZER_RE.findall(value)

def estimator_spec_for_softmax_classification(
    logits, labels, mode):
  """Returns EstimatorSpec instance for softmax classification."""
  predicted_classes = tf.argmax(logits, 1)
  if mode == tf.estimator.ModeKeys.PREDICT:
    return tf.estimator.EstimatorSpec(
        mode=mode,
        predictions={
            'class': predicted_classes,
            'prob': tf.nn.softmax(logits)
        })
  onehot_labels = tf.one_hot(labels, MAX_LABEL, 1, 0)
  loss = tf.losses.softmax_cross_entropy(
      onehot_labels=onehot_labels, logits=logits)
  if mode == tf.estimator.ModeKeys.TRAIN:
    optimizer = tf.train.AdamOptimizer(learning_rate=0.1)
    train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
    return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)

  eval_metric_ops = {
      'accuracy': tf.metrics.accuracy(
          labels=labels, predictions=predicted_classes)
  }
  return tf.estimator.EstimatorSpec(
      mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)

def updatePrediction(prediction):
  file = open("/tmp/ai/prediction.txt","a")
  file.write(str(prediction))
  file.close()

def customTestFn(input, vocab_processor):
  feature_configs = {'x_strings': tf.FixedLenFeature(shape=[1], dtype=tf.string)}
  tf_example = tf.parse_example(input, feature_configs)
  x_transform_test = vocab_processor.fit_transform(tf_example)
  return np.array(list(x_transform_test))

def rnn_model(features, labels, mode):
  word_vectors = tf.contrib.layers.embed_sequence(
      features[WORDS_FEATURE], vocab_size=n_words, embed_dim=EMBEDDING_SIZE)

  word_list = tf.unstack(word_vectors, axis=1)

  cell = tf.contrib.rnn.GRUCell(EMBEDDING_SIZE)

  _, encoding = tf.contrib.rnn.static_rnn(cell, word_list, dtype=tf.float32)

  logits = tf.layers.dense(encoding, MAX_LABEL, activation=None)
  return estimator_spec_for_softmax_classification(
      logits=logits, labels=labels, mode=mode)


def main(_):
  urls = url_datasets.load_urls('/tmp/ai/demo')
  sess = tf.InteractiveSession()
  serialized_tf_example = tf.placeholder(tf.string, name='x_strings')

  global n_words
  x_train = pandas.Series(urls.train.data[:,1])
  labels = pandas.Series(urls.train.data[:,0])
  y_train = pandas.Series(urls.train.target)
  y_test = pandas.Series(urls.test.target)
  vocab_processor = text.VocabularyProcessor(MAX_DOCUMENT_LENGTH,
               min_frequency=0,
               tokenizer_fn=tokenizer)
  vocab_processor.fit(labels)
  x_transform_train = vocab_processor.fit_transform(x_train)
  x_train = np.array(list(x_transform_train))

  print(vocab_processor.vocabulary_._mapping)

  n_words = len(vocab_processor.vocabulary_)
  vocab_dict = vocab_processor.vocabulary_._mapping
  model_fn = rnn_model

  classifier = tf.estimator.Estimator(model_fn=model_fn)

  train_input_fn = tf.estimator.inputs.numpy_input_fn(
        x={WORDS_FEATURE: x_train},
        y=y_train,
        batch_size=len(x_train),
        num_epochs=None,
        shuffle=False)
  classifier.train(input_fn=train_input_fn, steps=100)

  export_path_base = sys.argv[-1]
  export_path = os.path.join(
      tf.compat.as_bytes(export_path_base),
      tf.compat.as_bytes(str(FLAGS.model_version)))
  print('Exporting trained model to', export_path)
  builder = tf.saved_model.builder.SavedModelBuilder(export_path)

  sess.run(tf.global_variables_initializer())

  test_input_fn = tf.estimator.inputs.numpy_input_fn(
      x={WORDS_FEATURE: customTestFn(serialized_tf_example, vocab_processor)},
      y=y_test,
      num_epochs=1,
      shuffle=False)
  predictions = classifier.predict(input_fn=test_input_fn)

  y_predicted = np.array(list(p['class'] for p in predictions))
  inverseDictionary = dict(zip(vocab_dict.values(), vocab_dict.keys()))
  for prediction in y_predicted:
       print("prediction:"+inverseDictionary[int(prediction)])
       updatePrediction(prediction)
       print("--> %s" % prediction)
  tensor_info_x = tf.saved_model.utils.build_tensor_info(serialized_tf_example)
  tensor_info_y = tf.saved_model.utils.build_tensor_info(tf.convert_to_tensor(y_predicted, tf.float32))

  classification_inputs = tf.saved_model.utils.build_tensor_info(
      serialized_tf_example)

  classification_signature = (
      tf.saved_model.signature_def_utils.build_signature_def(
          inputs={
              tf.saved_model.signature_constants.CLASSIFY_INPUTS:
                 classification_inputs
          },
          outputs={
              tf.saved_model.signature_constants.CLASSIFY_OUTPUT_SCORES:
                 tensor_info_y
          },
          method_name=tf.saved_model.signature_constants.CLASSIFY_METHOD_NAME))

  prediction_signature = (
      tf.saved_model.signature_def_utils.build_signature_def(
          inputs={'x_strings': tensor_info_x},
          outputs={'scores': tensor_info_y},
          method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME))

  legacy_init_op = tf.group(tf.tables_initializer(), name='legacy_init_op')
  builder.add_meta_graph_and_variables(
      sess, [tf.saved_model.tag_constants.SERVING],
      signature_def_map={
          'predict_url':
              prediction_signature,
          tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
              classification_signature,
      },
      legacy_init_op=legacy_init_op)
  builder.save()

  print('Done exporting!')


if __name__ == '__main__':
  parser = argparse.ArgumentParser()
  parser.add_argument(
      '--test_with_fake_data',
      default=False,
      help='Test the example code with fake data.',
      action='store_true')
  parser.add_argument(
      '--bow_model',
      default=False,
      help='Run with BOW model instead of RNN.',
      action='store_true')
  tf.app.run()

每当我运行gRPC我得到相同的回复

Prediction from url_classify_client: outputs {
  key: "scores"
  value {
    dtype: DT_FLOAT
    tensor_shape {
      dim {
        size: 1
      }
    }
    float_val: 7.0
  }
}

这与模型导出期间的预测完全相同,因此我认为" y_predicted"服务期间未评估...

我不确定如何在服务期间调试它(我用Bazel运行它)。设置完成后:

export TF_CPP_MIN_VLOG_LEVEL=0
export GRPC_VERBOSITY=DEBUG
export GRPC_TRACE=all

我在日志中收到以下消息(当我执行gRPC请求时):

'PRI * HTTP/2.0....SM......$..................................@................@.:scheme.http@.:method.POST..:path-/tensorflow.serving.PredictionService/Predict@.:authority.localhost:9000@.te.trailers@.content-type.application/grpc@.user-agent8grpc-python/1.4.0 grpc-c/4.0.0 (osx; chttp2; gregarious)@.grpc-accept-encoding.identity,deflate,gzip..grpc-timeout.5S...............B..........=....predict_url..predict_url....x_strings....B./url/a /url/b ..........................'

我的导出命令

python tensorflow/tensorflow/examples/learn/saved_simple_url_classification.py /tmp/saved_rnn

我的服务命令

bazel-bin/tensorflow_serving/model_servers/tensorflow_model_server --port=9000 --model_name=predict_url --model_base_path=/tmp/saved_rnn/ --logtostderr --logdir logs &> grpc_log

我的gRPC命令

python url_classify_client.py --server=localhost:9000

0 个答案:

没有答案