I've made simple prediction model with keras and bag of words based on the code which I found in the tutorials. Loading dataset and training finished without problem and accuracy is around 88%.
Dataset has two columns text and tag (i.e. "some text, a"). How can I test trained model with some other data which is not in dataset like model.predict(some text)?
这是示例数据集: 标签 Sconto,n Trg Vinodolskog zakona 5,a 我希望保存模型,因此不必每次运行脚本时都进行训练。是否将正确的方法放在脚本“ model.save('my_model.h5')”的末尾? 如何加载模型并使用不在数据集中的数据进行预测?
import logging
import pandas as pd
import numpy as np
from numpy import random
import gensim
import nltk
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.metrics import accuracy_score, confusion_matrix
import matplotlib.pyplot as plt
from nltk.corpus import stopwords
import re
from bs4 import BeautifulSoup
df = pd.read_csv('dataset3.csv')
df = df[pd.notnull(df['tag'])]
df.head(10)
def print_plot(index):
example = df[df.index == index][['tekst', 'tag']].values[0]
if len(example) > 0:
print(example[0])
print('Tag:', example[1])
print_plot(0)
REPLACE_BY_SPACE_RE = re.compile('[/(){}\[\]\|@,;]')
BAD_SYMBOLS_RE = re.compile('[^0-9a-z #+_]')
STOPWORDS = set(stopwords.words('english'))
def clean_text(text):
"""
text: a string
return: modified initial string
"""
text = BeautifulSoup(text, "lxml").text # HTML decoding
text = text.lower() # lowercase text
text = REPLACE_BY_SPACE_RE.sub(' ', text) # replace REPLACE_BY_SPACE_RE symbols by space in text
text = BAD_SYMBOLS_RE.sub('', text) # delete symbols which are in BAD_SYMBOLS_RE from text
text = ' '.join(word for word in text.split() if word not in STOPWORDS) # delete stopwors from text
return text
df['tekst'] = df['tekst'].apply(clean_text)
print_plot(0)
import itertools
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import LabelBinarizer, LabelEncoder
from sklearn.metrics import confusion_matrix
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.preprocessing import text, sequence
from keras import utils
train_size = int(len(df) * .7)
print ("Train size: %d" % train_size)
print ("Test size: %d" % (len(df) - train_size))
train_posts = df['tekst'][:train_size]
train_tags = df['tag'][:train_size]
test_posts = df['tekst'][train_size:]
test_tags = df['tag'][train_size:]
max_words = 1000
tokenize = text.Tokenizer(num_words=max_words, char_level=False)
tokenize.fit_on_texts(train_posts) # only fit on train
x_train = tokenize.texts_to_matrix(train_posts)
x_test = tokenize.texts_to_matrix(test_posts)
encoder = LabelEncoder()
encoder.fit(train_tags)
y_train = encoder.transform(train_tags)
y_test = encoder.transform(test_tags)
num_classes = np.max(y_train) + 1
y_train = utils.to_categorical(y_train, num_classes)
y_test = utils.to_categorical(y_test, num_classes)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)
print('y_train shape:', y_train.shape)
print('y_test shape:', y_test.shape)
batch_size = 32
epochs = 2
# Build the model
model = Sequential()
model.add(Dense(512, input_shape=(max_words,)))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
history = model.fit(x_train, y_train,
batch_size=batch_size,
epochs=epochs,
verbose=1,
validation_split=0.1)
score = model.evaluate(x_test, y_test,
batch_size=batch_size, verbose=1)
print('Test accuracy:', score[1])
答案 0 :(得分:2)
完成模型训练后,可以使用model.save_weights(path)
将权重保存到磁盘。
然后您可以使用model.load_weights(path)
将权重加载到相同架构的模型中。
如果您还想保存模型架构,则可以使用更通用的model.save(path)
来保存
然后您可以使用加载模型
from keras.models import load_model
model = load_model(path)
恢复模型及其权重后,您可以evaluate
来确定模型的准确性或使用{p> 1对新数据进行predictions
prediction = model.predict(x_test)
loss, metrics = model.evaluate(x_test, y_test)
答案 1 :(得分:1)
是的,根据Keras Documentation FAQ page。您只需输入:model.save(filepath)
。
如果要加载已经存在的模型,请继续:keras.models.load_model(filepath)
。