我正在尝试使用tensorflow和tensorlayer在python中构建一个聊天机器人。我的代码如下:
from sklearn.feature_extraction.text import CountVectorizer
from nltk.tokenize import word_tokenize
import os
import time
import re
import tensorflow as tf
import tensorlayer as tl
import cPickle as pickle
FILE_DIR = os.path.dirname(os.path.realpath(__file__))
PAD_TOKEN = '<PAD>'
START_TOKEN = '<START>'
END_TOKEN = '<END>'
UNK_TOKEN = '<UNK>'
PAD_ID = 0
START_ID = 1
END_ID = 2
UNK_ID = 3
STARTING_VOCAB = {PAD_TOKEN: PAD_ID, START_TOKEN: START_ID, END_TOKEN: END_ID, UNK_TOKEN: UNK_ID}
_DIGIT_RE = re.compile(br"\d")
class Chatbot(object):
def __init__(self, embedding_dim, n_layers=3):
self.embedding_dim = embedding_dim
self.n_layers = n_layers
self.w2idx = STARTING_VOCAB
self.idx2w = {}
self.encode_seqs = None
self.decode_seqs = None
self.net = None
self.net_rnn = None
self.y = None
@staticmethod
def load():
with open(os.path.join(location, 'object.pkl'), 'rb') as pickle_file:
obj = pickle.load(pickle_file)
obj.encode_seqs = tf.placeholder(dtype=tf.int64, shape=[1, None], name="encode_seqs")
obj.decode_seqs = tf.placeholder(dtype=tf.int64, shape=[1, None], name="decode_seqs")
obj.net, obj.net_rnn = Chatbot.model(obj.encode_seqs, obj.decode_seqs, obj.idx2w, obj.embedding_dim, obj.n_layers, is_train=False, reuse=True)
obj.y = tf.nn.softmax(obj.net.outputs)
new_saver = tf.train.import_meta_graph(os.path.join(location, 'my-model.meta'))
new_saver.restore(sess, tf.train.latest_checkpoint(location))
return obj
@staticmethod
def model(encode_seqs, decode_seqs, idx2w, embedding_dim, n_layers, is_train=True, reuse=False):
with tf.variable_scope("model", reuse=reuse):
# for chatbot, you can use the same embedding layer,
# for translation, you may want to use 2 seperated embedding layers
with tf.variable_scope("embedding") as vs:
net_encode = tl.layers.EmbeddingInputlayer(inputs=encode_seqs,
vocabulary_size=len(idx2w),
embedding_size=embedding_dim,
name='seq_embedding')
vs.reuse_variables()
tl.layers.set_name_reuse(True)
net_decode = tl.layers.EmbeddingInputlayer(inputs=decode_seqs,
vocabulary_size=len(idx2w),
embedding_size=embedding_dim,
name='seq_embedding')
net_rnn = tl.layers.Seq2Seq(net_encode,
net_decode,
cell_fn=tf.contrib.rnn.BasicLSTMCell,
n_hidden=embedding_dim,
initializer=tf.random_uniform_initializer(-0.1, 0.1),
encode_sequence_length=tl.layers.retrieve_seq_length_op2(encode_seqs),
decode_sequence_length=tl.layers.retrieve_seq_length_op2(decode_seqs),
initial_state_encode=None,
dropout=(0.5 if is_train else None),
n_layer=n_layers,
return_seq_2d=True, name='seq2seq')
net_out = tl.layers.DenseLayer(net_rnn, n_units=len(idx2w), act=tf.identity, name='output')
return net_out, net_rnn
def train(self, X_train, y_train, sess, batch_size, n_epochs):
n_step = int(len(X_train) / batch_size)
# Create vocabulary
X_train = [re.sub(_DIGIT_RE, UNK_TOKEN, x.decode('utf-8')) for x in X_train]
y_train = [re.sub(_DIGIT_RE, UNK_TOKEN, x.decode('utf-8')) for x in y_train]
vectorizer = CountVectorizer(tokenizer=word_tokenize)
all_sentences = X_train + y_train
vectorizer.fit_transform(all_sentences)
for k, v in vectorizer.vocabulary_.iteritems():
vectorizer.vocabulary_[k] = v + len(self.w2idx)
self.w2idx.update(vectorizer.vocabulary_)
self.idx2w = dict((v, k) for k, v in self.w2idx.iteritems())
# Transform data from sentences to sequences of ids
for i in range(len(X_train)):
X_train_id_seq, y_train_id_seq = [], []
for w in word_tokenize(X_train[i]):
if w.lower() in self.w2idx:
X_train_id_seq.append(self.w2idx[w.lower()])
else:
X_train_id_seq.append(self.w2idx[UNK_TOKEN])
X_train[i] = X_train_id_seq + [PAD_ID]
for w in word_tokenize(y_train[i]):
if w.lower() in self.w2idx:
y_train_id_seq.append(self.w2idx[w.lower()])
else:
y_train_id_seq.append(self.w2idx[UNK_TOKEN])
y_train[i] = y_train_id_seq + [PAD_ID]
training_encode_seqs = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="encode_seqs")
training_decode_seqs = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="decode_seqs")
training_target_seqs = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="target_seqs")
training_target_mask = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="target_mask")
training_net_out, _ = Chatbot.model(training_encode_seqs, training_decode_seqs, self.idx2w, self.embedding_dim, self.n_layers, is_train=True, reuse=False)
# model for inferencing
self.encode_seqs = tf.placeholder(dtype=tf.int64, shape=[1, None], name="encode_seqs")
self.decode_seqs = tf.placeholder(dtype=tf.int64, shape=[1, None], name="decode_seqs")
self.net, self.net_rnn = Chatbot.model(self.encode_seqs, self.decode_seqs, self.idx2w, self.embedding_dim, self.n_layers, is_train=False, reuse=True)
self.y = tf.nn.softmax(self.net.outputs)
loss = tl.cost.cross_entropy_seq_with_mask(logits=training_net_out.outputs, target_seqs=training_target_seqs,
input_mask=training_target_mask, return_details=False, name='cost')
lr = 0.0001
train_op = tf.train.AdamOptimizer(learning_rate=lr).minimize(loss)
tl.layers.initialize_global_variables(sess)
for epoch in range(n_epochs):
epoch_time = time.time()
# shuffle training data
from sklearn.utils import shuffle
X_train, y_train = shuffle(X_train, y_train, random_state=0)
# train an epoch
total_err, n_iter = 0, 0
for X, Y in tl.iterate.minibatches(inputs=X_train, targets=y_train, batch_size=batch_size, shuffle=False):
step_time = time.time()
X = tl.prepro.pad_sequences(X)
_target_seqs = tl.prepro.sequences_add_end_id(Y, end_id=END_ID)
_target_seqs = tl.prepro.pad_sequences(_target_seqs)
_decode_seqs = tl.prepro.sequences_add_start_id(Y, start_id=START_ID, remove_last=False)
_decode_seqs = tl.prepro.pad_sequences(_decode_seqs)
_target_mask = tl.prepro.sequences_get_mask(_target_seqs)
_, err = sess.run([train_op, loss],
{training_encode_seqs: X,
training_decode_seqs: _decode_seqs,
training_target_seqs: _target_seqs,
training_target_mask: _target_mask})
print("Epoch[%d/%d] step:[%d/%d] loss:%f took:%.5fs" % (
epoch, n_epochs, n_iter, n_step, err, time.time() - step_time))
total_err += err;
n_iter += 1
print("Epoch[%d/%d] averaged loss:%f took:%.5fs" % (epoch, n_epochs, total_err / n_iter,
time.time() - epoch_time))
def save(self):
if not os.path.exists(location):
os.makedirs(location)
saver = tf.train.Saver()
saver.save(sess, os.path.join(location, 'my-model'))
tf.train.write_graph(sess.graph, location, 'my-graph.pbtxt')
self.net = None
self.net_rnn = None
self.y = None
self.encode_seqs = None
self.decode_seqs = None
with open(os.path.join(location, 'object.pkl'), 'wb') as pickle_file:
pickle.dump(self, pickle_file)
我可以完美地训练,我将一个句子列表作为X_train传递,另一个作为y_train传递。我需要帮助的是保存模型,然后再重新加载它以进行培训或测试。我尝试使用pickle,但它给出了一个错误。如何使用tensorflow和tensorlayer在python中保存和加载seq2seq模型?
答案 0 :(得分:0)
zip()
对象不是保存模型的最佳方式。
完成培训后使用
pickle
并加载
saver = tf.train.Saver()
saver.save(sess, model_name)
为了加载它,您必须先为您训练过的模型构建一个等效模型。
要保存图表对象,请尝试使用tf.train.write_graph