当我用keras训练rnn时,内存使用量一直在增加

时间:2017-05-23 09:08:54

标签: python deep-learning keras

我希望使用keras库来训练基于 rnn的排序网络。但是当我在CPU中训练网络时, CPU内存使用量一直在增加。而cpu很快就会崩溃。    我使用python的 gc 模块观察所有对象的大小变化,但所有对象的大小似乎始终相同。我发现当我设置更大的内存时,内存使用量增加的速度更快。   网络如下

  

#!/usr/bin/env python

from collections import defaultdict
from gc import get_objects
from keras.models import Sequential
from keras.layers.core import Activation, RepeatVector, TimeDistributedDense, Dropout, Dense
from keras.layers import recurrent
import numpy as np
from keras import backend as K
import sys
#from data import batch_gen, encode
RNN = recurrent.LSTM

# Neural networks take input as vectors so we have to convert integers to 
vectors using one-hot encoding
# This function will encode a given integer sequence into RNN compatible format (one-hot representation)

def encode(X,seq_len, vocab_size):
    x = np.zeros((len(X),seq_len, vocab_size), dtype=np.float32)
    for ind,batch in enumerate(X):
        for j, elem in enumerate(batch):
            x[ind, j, elem] = 1
    return x


# This is a generator function which can generate infinite-stream of inputs for training

def batch_gen(batch_size=32, seq_len=6, max_no=10):
# Randomly generate a batch of integer sequences (X) and its sorted
# counterpart (Y)
    x = np.zeros((batch_size, seq_len, max_no), dtype=np.float32)
    y = np.zeros((batch_size, seq_len, max_no), dtype=np.float32)

    while True:
    # Generates a batch of input
        X = np.random.randint(max_no, size=(batch_size, seq_len))

        Y = np.sort(X, axis=1)

        for ind,batch in enumerate(X):
            for j, elem in enumerate(batch):
                x[ind, j, elem] = 1

        for ind,batch in enumerate(Y):
            for j, elem in enumerate(batch):
                y[ind, j, elem] = 1

        yield x, y
        x.fill(0.0)
        y.fill(0.0)

# global parameters.
batch_size=32
seq_len = 6
max_no = 10

# Initializing model 
model = Sequential()

# This is encoder RNN (we are taking a variant of RNN called LSTM, because plain RNN's suffer from long-term dependencies issues
model.add(RNN(max_no))#, input_shape=(seq_len, max_no)))

# Dropout to enhace RNN's generalization capacities 
model.add(Dropout(0.25))

# At this point RNN will generate a summary vector of the sequence, so to feed it to decoder we need to repeat it lenght of output seq. number of times
model.add(RepeatVector(seq_len))

# Decoder RNN, which will return output sequence 
model.add(RNN(128, return_sequences=True))

# Adding linear layer at each time step 
model.add(TimeDistributedDense(128,max_no))

# Adding non-linearity on top of linear layer at each time-step, since output at each time step is supposed to be probability distribution over max. no of integer in sequence
# we add softmax non-linearity
model.add(Dropout(0.5))
model.add(Activation('softmax'))

# Since this is a multiclass classification task, crossentropy loss is being used. Optimizer is adam, which is a particular instance of adaptive learning rate Gradient Descent methods
model.compile(loss='categorical_crossentropy', optimizer='adam')#,
          #metrics=['accuracy'])

# Now the training loop, we'll sample input batches from the generator 
function written previously and feed it to the RNN for learning

#using get_object to observe the object
before = defaultdict(int)
after = defaultdict(int)
for i in get_objects():
before[type(i)] += 1

for ind,(X,Y) in enumerate(batch_gen(batch_size, seq_len, max_no)):
    for i in get_objects():
        after[type(i)] += 1
    print(after)
    print([(k, after[k] - before[k]) for k in after if after[k] - before[k]])  
    before = after   

    loss, acc = model.train_on_batch(X, Y, accuracy=True)

    if ind % 100 == 99:
        print(ind, 'loss=',loss, 'acc=', acc)

0 个答案:

没有答案