我使用Keras开发了一个文本分类模型。我还使用以下代码保存了模型:
model.save('E:/New Notes/Python/My BOW Model/my_model.h5')
文件已成功保存。现在,我想加载文件并对新文本进行分类。
这是我的代码:
from keras.models import load_model
from string import punctuation
from nltk.corpus import stopwords
from keras.preprocessing.text import Tokenizer
model = load_model('E:/New Notes/My BOW Model/my_model.h5')
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
# load doc into memory
def load_doc(filename):
# open the file as read only
file = open(filename, 'r')
# read all text
text = file.read()
# close the file
file.close()
return text
# load the vocabulary
vocab_filename = 'E:/New Notes/My BOW Model/vocab.txt'
vocab = load_doc(vocab_filename)
vocab = vocab.split()
vocab = set(vocab)
# turn a doc into clean tokens
def clean_doc(doc):
# split into tokens by white space
tokens = doc.split()
# remove punctuation from each token
table = str.maketrans('', '', punctuation)
tokens = [w.translate(table) for w in tokens]
# remove remaining tokens that are not alphabetic
tokens = [word for word in tokens if word.isalpha()]
# filter out stop words
stop_words = set(stopwords.words('english'))
tokens = [w for w in tokens if not w in stop_words]
# filter out short tokens
tokens = [word for word in tokens if len(word) > 1]
return tokens
def predict_sentiment(review, vocab, tokenizer, model):
# clean
tokens = clean_doc(review)
# filter by vocab
tokens = [w for w in tokens if w in vocab]
# convert to line
line = ' '.join(tokens)
# encode
encoded = tokenizer.texts_to_matrix([line], mode='freq')
# prediction
classname = model.predict(encoded, verbose=0)
return round(classname[0,0])
# classify a review as negative (0) or positive (1)
# test positive text
text = 'The movie has a good appeal.'
tokenizer = Tokenizer()
print(predict_sentiment(text, vocab, tokenizer, model))
# test negative text
text = 'I dont like the movie at all.'
print(predict_sentiment(text, vocab, tokenizer, model))
运行上述代码后,出现此错误:
AttributeError: 'Tokenizer' object has no attribute 'word_index'.
我在做什么错?预先感谢。