在我的LSTM网络训练期间,损失/ acc从42个时期到44个时期从0.75 / 85%变为4.97 / 17%。
为什么会发生这种情况?
我目前只有1500个训练样例,并且过度拟合。这会是一个原因吗?
对于上下文,我使用keras和lstm网络来预测对松弛文本的反应。我的训练数据是标签编码的句子,我正在预测反应类,它只是所有可能类的一个热门表示。
这是我在Keras的模型
# create the model
embedding_vecor_length = 128
model = Sequential()
model.add(Embedding(max_words, embedding_vecor_length, input_length=max_message_length))
model.add(LSTM(1024))
model.add(Dense(classes, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())
损失和准确性最终会在第100个时代恢复。这有点担心吗?
# coding: utf-8
# In[1]:
import pandas as pd
import re
import numpy as np
from keras.preprocessing.text import Tokenizer
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import os
pd.options.display.max_columns = 999
pd.options.display.max_colwidth = 999
np.random.seed(7)
# In[2]:
raw_data = pd.DataFrame()
for file in os.listdir('data/random'):
temp_df = pd.read_json(path_or_buf='data/random/' + file, orient='values', dtype=False)
raw_data = pd.concat([raw_data, temp_df])
for file in os.listdir('data/company-wide'):
temp_df = pd.read_json(path_or_buf='data/company-wide/' + file, orient='values', dtype=False)
raw_data = pd.concat([raw_data, temp_df])
for file in os.listdir('data/politics'):
temp_df = pd.read_json(path_or_buf='data/politics/' + file, orient='values', dtype=False)
raw_data = pd.concat([raw_data, temp_df])
# In[3]:
raw_data.shape
# In[4]:
# Only selected messages with reactions
data = raw_data.loc[(raw_data['reactions'].isnull() == False) & (raw_data['text'] != '')][['reactions', 'text']]
# In[5]:
data.shape
# In[6]:
def extractEmojiName(x):
max_count = 0
result = ''
for emoji in x:
if (emoji['count'] > max_count):
result = emoji['name']
return result
def removeUrls(x):
line = re.sub(r"<(http|https).*>", "", x)
return line
def removeUsername(x):
line = re.sub(r"<@.*>", "", x)
return line
# In[7]:
data['reactions_parsed'] = data['reactions'].apply(lambda x: extractEmojiName(x))
# In[8]:
data['text'] = data['text'].apply(lambda x: removeUrls(x))
data['text'] = data['text'].apply(lambda x: removeUsername(x))
# In[9]:
max_words = 10000
tokenizer = Tokenizer(nb_words=max_words)
tokenizer.fit_on_texts(data['text'])
text_vectors = tokenizer.texts_to_sequences(data['text'])
data['text_vector'] = text_vectors
# In[10]:
encoder = LabelEncoder()
data['reactions_encoded'] = encoder.fit_transform(data['reactions_parsed'])
# In[11]:
data
# In[12]:
classes = len(data['reactions_parsed'].unique())
target_vector = data['reactions_encoded'].values
reactions_vector = np.eye(classes)[target_vector]
data['reactions_vector'] = reactions_vector.tolist()
# In[13]:
max_message_length = data['text_vector'].apply(lambda x: len(x)).max()
# In[14]:
X_train, X_test, y_train, y_test = train_test_split(text_vectors, reactions_vector, test_size=.2, stratify=reactions_vector)
# In[15]:
X_train = np.array(X_train)
X_test = np.array(X_test)
y_train = np.array(y_train)
y_test = np.array(y_test)
# In[17]:
X_train = sequence.pad_sequences(X_train, maxlen=max_message_length)
X_test = sequence.pad_sequences(X_test, maxlen=max_message_length)
# In[18]:
# create the model
embedding_vecor_length = 128
model = Sequential()
model.add(Embedding(max_words, embedding_vecor_length, input_length=max_message_length))
model.add(Dropout(0.2))
model.add(LSTM(1024))
model.add(Dropout(0.2))
model.add(Dense(classes, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())
# In[ ]:
model.fit(X_train, y_train, nb_epoch=35, batch_size=64)
# In[ ]:
# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=1)
print("Accuracy: %.2f%%" % (scores[1]*100))
# In[45]:
scores
# In[56]:
def show_predictions(model, X_test, y_test):
predictions = model.predict(X_test)
index = 0
for prediction in predictions:
print('Prediction -> ' + encoder.inverse_transform(prediction.argmax()))
print('Actual -> ' + encoder.inverse_transform(y_test[index].argmax()))
index+=1
# In[57]:
show_predictions(model, X_test, y_test)
# In[58]:
show_predictions(model, X_train[0:100], y_train[0:100])
# In[ ]: