我用以下代码构建了一个带有lstm单元的模型,其输入为data1和data2,它们的形状为[None,5],表示某个时间[3,5],某个时间[13,5],某个时间[80] ,5],最多[50,5];
在训练时,我发现用过的内存随着训练步骤的增加而不断增加,这意味着当训练更多批次时,使用的内存会更多,最终在训练200或300批次之后出现存储错误;
现在,我知道如何通过删除“ @ tf.function”来避免这种情况(训练时使用的内存不断增加),但是我不知道为什么“ @ tf.function”和“ no”之间有什么区别@ tf.function”。
import tensorflow as tf
datas1 = [
[[2.3,3.3,2,4.3,2.3],[3.,1.2,3.,22.1,23.2,]],
[[2.3,3.3,2,4.3,2.3],[3.,1.2,3.,22.1,23.2,],[2.3,3.3,2,4.3,2.3],[3.,1.2,3.,22.1,23.2,]],
[[2,3,2,3,2],[3,2,3,2,2,]],
[[2.3,3.3,2,4.3,2.3],[3.,1.2,3.,22.1,23.2,],[2.3,3.3,2,4.3,2.3]],
...
]
datas2 = [
[[2.3,3.3,2,4.3,2.3],[3.,1.2,3.,22.1,23.2,],[2.3,3.3,2,4.3,2.3]],
[[2.3,3.3,2,4.3,2.3],[3.,1.2,3.,22.1,23.2,],[3.,1.2,3.,22.1,23.2,]],
[[2,3,2,3,2],[3,2,3,2,2,]],
[[2.3,3.3,2,4.3,2.3],[3.,1.2,3.,22.1,23.2,],[2.3,3.3,2,4.3,2.3],[3,2,3,2,2,]],
...
]
labels = [[0.0], [1.0], [1.0], [0.0], ...]
L = len(labels)
def generator():
for i in range(L):
yield datas1[i], datas2[i], labels[i]
trains = tf.data.Dataset.from_generator(generator, (tf.float32, tf.float32, tf.float16))
class ABC(tf.keras.Model):
def __init__(self):
super(ABC, self).__init__()
self.encoder1 = tf.keras.models.Sequential([\
tf.keras.layers.LSTM(32, return_sequences=True),
tf.keras.layers.LSTM(16)])
self.encoder2 = tf.keras.models.Sequential([\
tf.keras.layers.LSTM(32, return_sequences=True),
tf.keras.layers.LSTM(16)])
self.dense = tf.keras.layers.Dense(1, activation='sigmoid')
def call(self, data1, data2):
en1 = self.encoder1(data1)
en2 = self.encoder2(data2)
en12 = tf.keras.layers.concatenate([en1, en2])
return self.dense(en12)
model = ABC()
loss_object = tf.keras.losses.BinaryCrossentropy()
optimizer = tf.keras.optimizers.Adam()
@tf.function
def train_step(data1, data2, labels):
with tf.GradientTape() as tape:
predictions = model(data1, data2)
loss = loss_object(labels, predictions)
gradients = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
for i in range(5):
for data1, data2, label in trains.batch(1):
train_step(data1, data2, label)