在下面这个非常简单的例子中,为什么隐藏状态由2个Tensors组成?根据我的理解,是不是应该只是尺寸为20的张量?
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
rnn = nn.LSTM(input_size=10, hidden_size=20)
input = Variable(torch.randn(50, 1, 10))#seq_len x batch x input_size
output, hn = rnn(input)
print (hn)
输出:
(tensor([[[ 0.0891, -0.0040, -0.0161, 0.1959, -0.0739, -0.0193, -0.0766, 0.0063, 0.0570, -0.1149, 0.0069, 0.1713, -0.1526, 0.0510, -0.1189, -0.0073, -0.0418, 0.0637, -0.1596, 0.0528]]]), tensor([[[ 0.1572, -0.0137, -0.0400, 0.2685, -0.1569, -0.0492, -0.1998, 0.0157, 0.1495, -0.2752, 0.0128, 0.3332, -0.2082, 0.1847, -0.5665, -0.0138, -0.0818, 0.0992, -0.4049, 0.1082]]]))