我正在尝试训练LSTM网络,但是损失总是NaN
。我的数据在0到1之间缩放,因此我认为爆炸梯度不是问题。是什么引起了这个问题?
import pandas as pd
from collections import defaultdict as dd
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Conv1D
from tensorflow.keras.layers import MaxPooling1D
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import TimeDistributed
from tensorflow.keras.layers import Flatten
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras import optimizers
import matplotlib.pyplot as plt
gpus = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpus[0], True)
data = pd.read_csv(r"D:\ML_Datasets\Signal\Myo\s1\data.csv")
EPOCHS = 100
BATCH_SIZE = 16
MAX_LEN = 1000
samples_dict = dd(lambda: {})
def normalize(v): # Scale to [0, 1]
norm = (v - np.min(v))/np.ptp(v)
return norm
def get_repetition(y):
samples_dict[str(y.loc[y.index[0], "stimulus"])][str(y.loc[y.index[0], "repetition"])] = y.iloc[:, 2:].to_numpy()
def get_stimulus(x):
if x.loc[x.index[0], "stimulus"] == 0:
return
x.groupby("repetition").apply(get_repetition)
data.groupby("stimulus").apply(get_stimulus)
X = []
Y = []
for stim in samples_dict.keys():
for rep in samples_dict[stim].keys():
vals = np.array(samples_dict[stim][rep])
vals = normalize(vals)
pad = np.zeros(shape=(MAX_LEN - vals.shape[0], 16))
X.append(np.concatenate((vals, pad), axis=0))
Y.append(int(stim))
X = np.array(X) #(72, 1000, 16)
Y = np.array(Y) #(72,)
print(np.min(X), np.max(X))
model = Sequential()
model.add(LSTM(100, input_shape=X[0].shape))
model.add(Dense(12, activation="softmax"))
optimizer = optimizers.Adam()
model.compile(optimizer=optimizer, loss="sparse_categorical_crossentropy", metrics=['accuracy'])
print(model.summary())
# fit model
history = model.fit(X, Y, epochs=EPOCHS, batch_size=BATCH_SIZE)