我希望使用keras库来训练基于 rnn的排序网络。但是当我在CPU中训练网络时, CPU内存使用量一直在增加。而cpu很快就会崩溃。 我使用python的 gc 模块观察所有对象的大小变化,但所有对象的大小似乎始终相同。我发现当我设置更大的内存时,内存使用量增加的速度更快。 网络如下
码
#!/usr/bin/env python
from collections import defaultdict
from gc import get_objects
from keras.models import Sequential
from keras.layers.core import Activation, RepeatVector, TimeDistributedDense, Dropout, Dense
from keras.layers import recurrent
import numpy as np
from keras import backend as K
import sys
#from data import batch_gen, encode
RNN = recurrent.LSTM
# Neural networks take input as vectors so we have to convert integers to
vectors using one-hot encoding
# This function will encode a given integer sequence into RNN compatible format (one-hot representation)
def encode(X,seq_len, vocab_size):
x = np.zeros((len(X),seq_len, vocab_size), dtype=np.float32)
for ind,batch in enumerate(X):
for j, elem in enumerate(batch):
x[ind, j, elem] = 1
return x
# This is a generator function which can generate infinite-stream of inputs for training
def batch_gen(batch_size=32, seq_len=6, max_no=10):
# Randomly generate a batch of integer sequences (X) and its sorted
# counterpart (Y)
x = np.zeros((batch_size, seq_len, max_no), dtype=np.float32)
y = np.zeros((batch_size, seq_len, max_no), dtype=np.float32)
while True:
# Generates a batch of input
X = np.random.randint(max_no, size=(batch_size, seq_len))
Y = np.sort(X, axis=1)
for ind,batch in enumerate(X):
for j, elem in enumerate(batch):
x[ind, j, elem] = 1
for ind,batch in enumerate(Y):
for j, elem in enumerate(batch):
y[ind, j, elem] = 1
yield x, y
x.fill(0.0)
y.fill(0.0)
# global parameters.
batch_size=32
seq_len = 6
max_no = 10
# Initializing model
model = Sequential()
# This is encoder RNN (we are taking a variant of RNN called LSTM, because plain RNN's suffer from long-term dependencies issues
model.add(RNN(max_no))#, input_shape=(seq_len, max_no)))
# Dropout to enhace RNN's generalization capacities
model.add(Dropout(0.25))
# At this point RNN will generate a summary vector of the sequence, so to feed it to decoder we need to repeat it lenght of output seq. number of times
model.add(RepeatVector(seq_len))
# Decoder RNN, which will return output sequence
model.add(RNN(128, return_sequences=True))
# Adding linear layer at each time step
model.add(TimeDistributedDense(128,max_no))
# Adding non-linearity on top of linear layer at each time-step, since output at each time step is supposed to be probability distribution over max. no of integer in sequence
# we add softmax non-linearity
model.add(Dropout(0.5))
model.add(Activation('softmax'))
# Since this is a multiclass classification task, crossentropy loss is being used. Optimizer is adam, which is a particular instance of adaptive learning rate Gradient Descent methods
model.compile(loss='categorical_crossentropy', optimizer='adam')#,
#metrics=['accuracy'])
# Now the training loop, we'll sample input batches from the generator
function written previously and feed it to the RNN for learning
#using get_object to observe the object
before = defaultdict(int)
after = defaultdict(int)
for i in get_objects():
before[type(i)] += 1
for ind,(X,Y) in enumerate(batch_gen(batch_size, seq_len, max_no)):
for i in get_objects():
after[type(i)] += 1
print(after)
print([(k, after[k] - before[k]) for k in after if after[k] - before[k]])
before = after
loss, acc = model.train_on_batch(X, Y, accuracy=True)
if ind % 100 == 99:
print(ind, 'loss=',loss, 'acc=', acc)