我打算从keras model.predict_classes
from numpy import array
from keras.preprocessing.text import Tokenizer
from keras.utils import to_categorical
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.layers import Embedding
# generate a sequence from a language model
def generate_seq(model, tokenizer, max_length, seed_text, n_words):
in_text = seed_text
# generate a fixed number of words
for _ in range(n_words):
# encode the text as integer
encoded = tokenizer.texts_to_sequences([in_text])[0]
# pre-pad sequences to a fixed length
encoded = pad_sequences([encoded], maxlen=max_length, padding='pre')
# predict probabilities for each word
yhat = model.predict_classes(encoded, verbose=1)
print('yhat = ' + yhat)
#print('yhat : ' + str(yhat))
# map predicted word index to word
out_word = ''
for word, index in tokenizer.word_index.items():
if index == yhat:
out_word = word
# append to input
in_text += ' ' + out_word
return in_text
# source text
data = """apprenti electricien chefOdeOprojet \n
soudeur chefOdeOsection directeurOusine\n
mecanicien chefOdeOsection directeurOadjoint\n
ingenieur chefOdeOprojet directeurOadjoint directeurOusine\n
ingenieur chefOdeOprojet \n
apprenti soudeur chefOdeOsection chefOdeOprojet\n
ingenieurOetude chefOdeOprojet\n
ingenieurOetude manager chefOdeOprojet directeurOdepartement\n
apprenti gestionOproduction manager directeurOdepartement\n
ingenieurOetude commercial\n
soudeur ingenieurOetude manager directeurOadjoint\n
ingenieurOetude directeurOdepartement directeurOusine\n
apprenti soudeur\n
agentOsecurite chefOsecurite\n
apprenti mecanicien ouvrier manager\n
commercial directeurOadjoint\n
agentOsecurite chefOsecurite\n
directeurOusine retraite\n
ouvrier manager\n
ingenieur vente\n
secretaire comptable\n
comptable chefOcomptable\n
chefOcomptable directeurOdepartement\n
assistant secretaire comptable\n
assistant comptable\n
assistant secretaire commercial\n
commercial chefOdeOprojet\n
commercial vente chefOdeOprojet\n
electricien chefOdeOsection\n
apprenti ouvrier chefOdeOsection\n"""
# integer encode sequences of words
tokenizer = Tokenizer()
encoded = tokenizer.texts_to_sequences([data])[0]
# retrieve vocabulary size
vocab_size = len(tokenizer.word_index) + 1
print('Vocabulary Size: %d' % vocab_size)
# encode 2 words -> 1 word
sequences = list()
for line in data.split('\n'):
encoded = tokenizer.texts_to_sequences([line])[0]
for i in range(2, len(encoded)):
sequence = encoded[i-2:i+1]
print('Total Sequences: %d' % len(sequences))
# pad sequences
max_length = max([len(seq) for seq in sequences])
sequences = pad_sequences(sequences, maxlen=max_length, padding='pre')
print('Max Sequence Length: %d' % max_length)
# split into input and output elements
sequences = array(sequences)
X, y = sequences[:,:-1],sequences[:,-1]
y = to_categorical(y, num_classes=vocab_size)
# define model
model = Sequential()
model.add(Embedding(vocab_size, 10, input_length=max_length-1))
#model.add(Dense(units = 3, activation = 'relu'))
model.add(Dense(vocab_size, activation='softmax'))
# compile network
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# fit network
model.fit(X, y, epochs=500, verbose=0)
# evaluate model
print(generate_seq(model, tokenizer, max_length-1, 'electricien secretaire', 1))
Vocabulary Size: 24
Total Sequences: 20
Max Sequence Length: 3
Layer (type) Output Shape Param #
embedding_2 (Embedding) (None, 2, 10) 240
lstm_2 (LSTM) (None, 50) 12200
dropout_2 (Dropout) (None, 50) 0
dense_2 (Dense) (None, 24) 1224
Total params: 13,664
Trainable params: 13,664
Non-trainable params: 0
1/1 [==============================] - 0s 86ms/step
yhat = [1]
electricien secretaire chefodeoprojet
答案 0 :(得分:0)
# Fit the model
print "Fitting model..."
model.fit(np.asarray(self.X), np.asarray(self.Y), epochs=200, batch_size=10)
print "Model fitting complete."
self.TEST = np.asarray(self.TEST).reshape(( test_data.shape[0], 1, 128))
print "Predicting on Test (unseen) data..."
predictions = model.predict(self.TEST)
# Sigmoid predictions
labels = np.zeros(predictions.shape)
labels[predictions>0.5] = 1
print "Prediction labels for unseen: " + str(labels)
Prediction labels for unseen:
[[ 0. 1. 0. 0.]
[ 0. 1. 0. 0.]
[ 0. 1. 0. 0.]
[ 0. 1. 0. 0.]
[ 0. 1. 0. 0.]
[ 0. 0. 1. 0.]
[ 0. 0. 1. 0.]
[ 0. 0. 1. 0.]]
每一行表示一个样本的分类; 1的索引表示样本属于哪一类(A,B,C,D)。