在张量流上训练具有不同数据形状的LSTM模型时如何节省内存

时间:2019-09-03 06:44:34

标签: tensorflow lstm

我用以下代码构建了一个带有lstm单元的模型,其输入为data1和data2,它们的形状为[None,5],表示某个时间[3,5],某个时间[13,5],某个时间[80] ,5],最多[50,5];

在训练时,我发现用过的内存随着训练步骤的增加而不断增加,这意味着当训练更多批次时,使用的内存会更多,最终在训练200或300批次之后出现存储错误;

现在,我知道如何通过删除“ @ tf.function”来避免这种情况(训练时使用的内存不断增加),但是我不知道为什么“ @ tf.function”和“ no”之间有什么区别@ tf.function”。

import tensorflow as tf
datas1 = [
        [[2.3,3.3,2,4.3,2.3],[3.,1.2,3.,22.1,23.2,]],
        [[2.3,3.3,2,4.3,2.3],[3.,1.2,3.,22.1,23.2,],[2.3,3.3,2,4.3,2.3],[3.,1.2,3.,22.1,23.2,]],
        [[2,3,2,3,2],[3,2,3,2,2,]],
        [[2.3,3.3,2,4.3,2.3],[3.,1.2,3.,22.1,23.2,],[2.3,3.3,2,4.3,2.3]],
        ...
        ]
datas2 = [
        [[2.3,3.3,2,4.3,2.3],[3.,1.2,3.,22.1,23.2,],[2.3,3.3,2,4.3,2.3]],
        [[2.3,3.3,2,4.3,2.3],[3.,1.2,3.,22.1,23.2,],[3.,1.2,3.,22.1,23.2,]],
        [[2,3,2,3,2],[3,2,3,2,2,]],
        [[2.3,3.3,2,4.3,2.3],[3.,1.2,3.,22.1,23.2,],[2.3,3.3,2,4.3,2.3],[3,2,3,2,2,]],
        ...
        ]
labels = [[0.0], [1.0], [1.0], [0.0], ...]
L = len(labels)
def generator():
    for i in range(L):
        yield datas1[i], datas2[i], labels[i]
trains = tf.data.Dataset.from_generator(generator, (tf.float32, tf.float32, tf.float16))

class ABC(tf.keras.Model):
    def __init__(self):
        super(ABC, self).__init__()
        self.encoder1 = tf.keras.models.Sequential([\
                        tf.keras.layers.LSTM(32, return_sequences=True),
                        tf.keras.layers.LSTM(16)])
        self.encoder2 = tf.keras.models.Sequential([\
                        tf.keras.layers.LSTM(32, return_sequences=True),
                        tf.keras.layers.LSTM(16)])
        self.dense = tf.keras.layers.Dense(1, activation='sigmoid')
    def call(self, data1, data2):
        en1 = self.encoder1(data1)
        en2 = self.encoder2(data2)
        en12 = tf.keras.layers.concatenate([en1, en2])
        return self.dense(en12)
model = ABC()
loss_object = tf.keras.losses.BinaryCrossentropy()
optimizer = tf.keras.optimizers.Adam()

@tf.function
def train_step(data1, data2, labels):
    with tf.GradientTape() as tape:
        predictions = model(data1, data2)
        loss = loss_object(labels, predictions)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))

for i in range(5):
    for data1, data2, label in trains.batch(1):
        train_step(data1, data2, label)

0 个答案:

没有答案