我的LSTM代码给出了平坦的预测,不知道出了什么问题。有人可以帮忙看看吗?

时间:2018-10-21 08:10:53

标签: python machine-learning lstm pytorch rnn

这是代码。我认为类mylstm有问题,但我找不到它...输入很简单,只有7列数据。

我试图打印所有张量,但没有发现问题所在。感谢您的帮助!

class mylstm(nn.Module):
    def __init__(self, input_size, hidden_size, T, logger):
        super(mylstm, self).__init__()

        self.T = T
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.logger = logger


        self.lstm_layer = nn.LSTM(input_size = 7, hidden_size = hidden_size)
        self.fc = nn.Linear(hidden_size, 1)
        #self.fc.weight.data.normal_()

    def forward(self, input_data):

        hidden = self.init_hidden(input_data)
        cell = self.init_hidden(input_data)
        for t in range(self.T - 1):
            if t < self.T - 1:
                self.lstm_layer.flatten_parameters()
                _, lstm_output = self.lstm_layer(input_data[:,t,:].unsqueeze(0), (hidden, cell))
                hidden = lstm_output[0] 
                cell = lstm_output[1] 
        y_pred = self.fc(hidden[0])
        return y_pred


    def init_hidden(self, x):
        return Variable(x.data.new(1, x.size(0), self.hidden_size).zero_())


# Train the model
class rnn:
    def __init__(self, file_data, logger, input_size = 7, hidden_size = 64, T = 10,
                 learning_rate = 0.01, batch_size = 128, parallel = True, debug = False):
        self.T = T
        dat = pd.read_csv(file_data, nrows = 100 if debug else None)
        self.logger = logger
        self.logger.info("Shape of data: %s.\nMissing in data: %s.", dat.shape, dat.isnull().sum().sum())

        self.X = dat.loc[:, [x for x in dat.columns.tolist()]].values
        self.y = np.array(dat.rtm_spp)
        self.batch_size = batch_size

        self.lstm1 = mylstm(input_size = input_size,
                               hidden_size = hidden_size,
                               T = T, logger = logger)

        if parallel:
            self.lstm1 = nn.DataParallel(self.lstm1)

        self.lstm1_optimizer = optim.Adam(params = filter(lambda p: p.requires_grad, self.lstm1.parameters()),
                                           lr = learning_rate)

        self.train_size = 20000
        self.y = self.y - np.mean(self.y[:self.train_size]) # Question: why Adam requires data to be normalized?
        self.logger.info("Training size: %d.", self.train_size)

    def train(self, n_epochs = 10):
        iter_per_epoch = int(np.ceil(self.train_size * 1. / self.batch_size))
        logger.info("Iterations per epoch: %3.3f ~ %d.", self.train_size * 1. / self.batch_size, iter_per_epoch)
        self.iter_losses = np.zeros(n_epochs * iter_per_epoch)
        self.epoch_losses = np.zeros(n_epochs)

        self.loss_func = nn.MSELoss()

        n_iter = 0

        learning_rate = 1.

        for i in range(n_epochs):
            perm_idx = np.random.permutation(self.train_size - self.T-1)
            j = 0
            while j < self.train_size:
                batch_idx = perm_idx[j:(j + self.batch_size)]
                X = np.zeros((len(batch_idx), self.T - 1, self.X.shape[1]))
                #y_history = np.zeros((len(batch_idx), self.T - 1))
                y_target = self.y[batch_idx + self.T]

                for k in range(len(batch_idx)):
                    X[k, :, :] = self.X[batch_idx[k] : (batch_idx[k] + self.T - 1), :]

                loss = self.train_iteration(X, y_target)
                self.iter_losses[i * iter_per_epoch + j // self.batch_size] = loss
                #if (j / self.batch_size) % 50 == 0:

                j += self.batch_size
                n_iter += 1

                if n_iter % 10000 == 0 and n_iter > 0:
                    for param_group in self.lstm1_optimizer.param_groups:
                        param_group['lr'] = param_group['lr'] * 0.9


            self.epoch_losses[i] = np.mean(self.iter_losses[range(i * iter_per_epoch, (i + 1) * iter_per_epoch)])
            if i % 10 == 0:
                self.logger.info("Epoch %d, loss: %3.3f.", i, self.epoch_losses[i])
                y_train_pred = self.predict(on_train = True)
                y_test_pred = self.predict(on_train = False)



    def train_iteration(self, X,y_target):
        self.lstm1_optimizer.zero_grad()
        y_pred = self.lstm1(Variable(torch.from_numpy(X).type(torch.FloatTensor)))
        y_true = Variable(torch.from_numpy(y_target).type(torch.FloatTensor))
        y_true = y_true.view(y_true.shape[0],1)
        y_pred=y_pred.squeeze(0)
        print(y_pred)
        loss = self.loss_func(y_pred, y_true)
        loss.backward()
        self.lstm1_optimizer.step()
        return loss.data[0]

    def predict(self, on_train = False):
        if on_train:
             y_pred = np.zeros(self.train_size - self.T +1)
        else:
             y_pred = np.zeros(self.X.shape[0] - self.train_size)

        i = 0
        while i < len(y_pred):
            batch_idx = np.array(range(len(y_pred)))[i : (i + self.batch_size)]
            X = np.zeros((len(batch_idx), self.T - 1, self.X.shape[1]))
            #y_history = np.zeros((len(batch_idx), self.T - 1))
            for j in range(len(batch_idx)):
                if on_train:
                    X[j, :, :] = self.X[range(batch_idx[j], batch_idx[j] + self.T - 1), :]

                else:
                    X[j, :, :] = self.X[range(batch_idx[j] + self.train_size - self.T, batch_idx[j] + self.train_size - 1), :]

            input_data = Variable(torch.from_numpy(X).type(torch.FloatTensor))
           # print(self.lstm1(torch.randn(128,9,7)))
            #print(self.lstm1(X).data.numpy())
            y_pred[i:(i + self.batch_size)] = self.lstm1(input_data).data.numpy()[:,0]
            i += self.batch_size
        return y_pred


model = rnn(file_data = 'L.csv', logger = logger, parallel = False,
              learning_rate = .001)

model.train(n_epochs = 1000)

y_pred = model.predict()

1 个答案:

答案 0 :(得分:0)

如果您可以将代码简化为仍然可以重现问题的最简单形式,则可能会很好。要求人们调试200多行代码可能是一个很大的问题。如果您可以使用一个非常简单的NN模型(而不是当前模型)来举例说明问题,那么许多其他人都愿意研究您的代码并帮助您确定问题所在。