Question

对于Tensorflow还是陌生的，我已经使用tensorflow Hight级别API（DNNClassifier）建立了模型

这是我的模特

estimator = tf.estimator.DNNClassifier(
    hidden_units=[4, 4],
    feature_columns=[embedded_text_feature_column],
    n_classes=4,
    optimizer=tf.train.AdagradOptimizer(learning_rate=0.003),
    model_dir= os.getcwd()+'\Model'   
 )

现在一切都还好。

当我尝试加载模型以便可以进行预测时

显示错误。

这是我的预测代码

#Predict with new data
new_samples = np.array(
  predict_test_input_fn 
)


predict_input_fn = tf.estimator.inputs.numpy_input_fn(
    x={"x": new_samples },
    num_epochs=1,
    shuffle=False
)

predictions = estimator.predict(input_fn=predict_input_fn)
predicted_classes = [p["classes"] for p in predictions]

print("New Samples, Class Predictions: {}\n".format(predicted_classes))

这是错误

Traceback (most recent call last):
  File "main.py", line 130, in <module>
    predicted_classes = [p["classes"] for p in predictions]
  File "main.py", line 130, in <listcomp>
    predicted_classes = [p["classes"] for p in predictions]
  File "C:\Users\mrjai\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\estimator\estimator.py", line 494, in predict
    input_fn, model_fn_lib.ModeKeys.PREDICT)
  File "C:\Users\mrjai\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\estimator\estimator.py", line 670, in _get_features_from_input_fn
    result = self._call_input_fn(input_fn, mode)
  File "C:\Users\mrjai\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\estimator\estimator.py", line 798, in _call_input_fn
    return input_fn(**kwargs)
  File "C:\Users\mrjai\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\estimator\inputs\numpy_io.py", line 175, in input_fn
    if len(set(v.shape[0] for v in ordered_dict_data.values())) != 1:
  File "C:\Users\mrjai\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\estimator\inputs\numpy_io.py", line 175, in <genexpr>
    if len(set(v.shape[0] for v in ordered_dict_data.values())) != 1:
IndexError: tuple index out of range

如果有人可以帮助我，这就是我的全部代码。（我仍然不知道如何加载模型并使用它来进行预测）

import tensorflow as tf
import tensorflow_hub as hub
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import re
import seaborn as sns
import logging



print("Loading all files from directory ...")
# Load all files from a directory in a DataFrame.
def load_directory_data(directory):
  data = {}
  data["sentence"] = []
  data["tnemitnes"] = []
  print("getting in a loop")
  for file_path in os.listdir(directory):
    with tf.gfile.GFile(os.path.join(directory, file_path), "r") as f:
      print("directory : ",directory)
      print("file path : ",file_path)
      data["sentence"].append(f.read())
      data["tnemitnes"].append(re.match("(\d+)\.txt", file_path).group(1))
  return pd.DataFrame.from_dict(data)

print("merging all files in the training set ...")
# Merge all type of emails examples, add a polarity column and shuffle.
def load_dataset(directory):
  pos_df = load_directory_data(os.path.join("train/br"))
  neg_df = load_directory_data(os.path.join(directory, "train/mi"))
  dos_df = load_directory_data(os.path.join(directory, "train/Brouillons")) #dsd
  nos_df = load_directory_data(os.path.join(directory, "train/favoris")) #dsd
  pos_df["polarity"] = 3
  neg_df["polarity"] = 2
  dos_df["polarity"] = 1
  nos_df["polarity"] = 0
  return pd.concat([pos_df, neg_df, dos_df , nos_df]).sample(frac=1).reset_index(drop=True)

print("Getting the data from files ...")
# Download and process the dataset files.
def download_and_load_datasets():
  train_df = load_dataset(os.path.dirname("train"))
  test_df = load_dataset(os.path.dirname("test"))
  return train_df, test_df


print("configurring all logging output ...")
# Reduce logging output. ERROR
#logging.set_verbosity(tf.logging.INFO)
logging.getLogger().setLevel(logging.INFO)



print("Setting Up the data for the trainning ...")
train_df, test_df = download_and_load_datasets()
train_df.head()


print("Setting Up a Training input on the whole training set with no limit on training epochs ...")
# Training input on the whole training set with no limit on training epochs.
train_input_fn = tf.estimator.inputs.pandas_input_fn(train_df, train_df["polarity"], num_epochs=None, shuffle=True)

print("Setting Up a Prediction on the whole training set ...")
# Prediction on the whole training set.
predict_train_input_fn = tf.estimator.inputs.pandas_input_fn(train_df, train_df["polarity"], shuffle=False)

print("Setting Up a Prediction on the test set ...")
# Prediction on the test set.
predict_test_input_fn = tf.estimator.inputs.pandas_input_fn(test_df, test_df["polarity"], shuffle=False )


print("Removal of punctuation and splitting on spaces from the data ...")
#The module is responsible for preprocessing of sentences (e.g. removal of punctuation and splitting on spaces).
embedded_text_feature_column = hub.text_embedding_column(key="sentence", module_spec="https://tfhub.dev/google/nnlm-en-dim128/1")

print("Setting Up The Classifier ...")
#Estimator : For classification I did use a DNN Classifier


estimator = tf.estimator.DNNClassifier(
    hidden_units=[4, 4],
    feature_columns=[embedded_text_feature_column],
    n_classes=4,
    optimizer=tf.train.AdagradOptimizer(learning_rate=0.003),
    model_dir= os.getcwd()+'\Model')



print("Starting the Training ...")
# Training for 50 steps means 5000 training examples with the default
# batch size. This is roughly equivalent to 5 epochs since the training dataset
# contains less examples.
estimator.train(input_fn=train_input_fn, steps=4);

print("the Training had ended...")

print("setting Up the results ...")
train_eval_result = estimator.evaluate(input_fn=predict_train_input_fn)
test_eval_result = estimator.evaluate(input_fn=predict_test_input_fn)   


print("Showing the results ...")
print("Training set accuracy: {accuracy}".format(**train_eval_result))
print("Test set accuracy: {accuracy}".format(**test_eval_result)) 


#Still having trouble with the prediction

print("Loading new data ...")
pred = load_directory_data(os.path.join("newData"))

print("The data file is ready ...")


#Predict with new data
new_samples = np.array(
  predict_test_input_fn 
)


predict_input_fn = tf.estimator.inputs.numpy_input_fn(
    x={"x": new_samples },
    num_epochs=1,
    shuffle=False
)

predictions = estimator.predict(input_fn=predict_input_fn)
predicted_classes = [p["classes"] for p in predictions]

print("New Samples, Class Predictions: {}\n".format(predicted_classes))

部署DNNClassifier模型时遇到麻烦（在预训练的模型上使用预测函数）

0 个答案: