我有以下简单的LSTM网络:
class LSTMModel(nn.Module):
def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
super().__init__()
self.hidden_dim = hidden_dim
self.layer_dim = layer_dim
self.rnn = nn.LSTM(input_dim, hidden_dim, layer_dim, batch_first=True)
self.fc = nn.Linear(hidden_dim, output_dim)
self.batch_size = None
self.hidden = None
def forward(self, x):
h0, c0 = self.init_hidden(x)
out, (hn, cn) = self.rnn(x, (h0, c0))
out = self.fc(out[:, -1, :])
return out
def init_hidden(self, x):
h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim)
c0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim)
return [t for t in (h0, c0)]
我将此模型初始化为“
model = LSTMClassifier(28, 10, 6, 1)
即每个输入实例具有6个时间步长,每个时间步长的维数为28,隐藏维数为10。输入被映射为1的输出暗度。
正在按大小16的批次准备训练数据,这意味着在训练循环中传递的数据具有以下形状:
torch.Size([16, 6, 28])
带有形状标签:
batches[1][0].size()
输入示例:
tensor([[-0.3674, 0.0347, -0.2169, -0.0821, -0.3673, -0.1773, 1.1840, -0.2669,
-0.4202, -0.1473, -0.1132, -0.4756, -0.3565, 0.5010, 0.1274, -0.1147,
0.2783, 0.0836, -1.3251, -0.8067, -0.6447, -0.7396, -0.3241, 1.3329,
1.3801, 0.8198, 0.6098, 0.0697],
[-0.2710, 0.1596, -0.2524, -0.0821, -0.3673, -0.1773, 0.0302, -0.2099,
-0.4550, 0.1451, -0.4561, -0.5207, -0.5657, -0.5287, -0.2690, -0.1147,
-0.0346, -0.1043, -0.7515, -0.8392, -0.4745, -0.7396, -0.3924, 0.8122,
-0.1624, -1.2198, 0.0326, -0.9306],
[-0.1746, 0.0972, -0.2702, -0.0821, -0.3673, -0.1773, -0.0468, -1.1225,
-0.4480, -0.4397, 0.4011, -1.1073, -1.0536, -0.1855, -0.7502, -0.1147,
-0.0146, -0.1545, -0.1919, -0.1674, 0.0930, -0.7396, 0.8106, 1.1594,
0.4546, -1.2198, -0.5446, -1.2640],
[-0.2710, 0.0660, -0.2524, -0.0821, -0.4210, -0.1773, 1.8251, -0.5236,
-0.4410, -0.7321, 0.4011, -0.6110, -0.2171, 1.1875, -0.2973, -0.1147,
-0.1278, 0.7728, -0.9334, -0.5141, -2.1202, 1.3521, -0.9393, 0.5085,
-0.4709, 0.8198, -1.1218, 0.0697],
[-0.3674, -0.0277, -0.2347, -0.0821, -0.0448, -0.1773, 0.2866, -0.1386,
-0.4271, 0.4375, -0.2847, -0.1146, -0.4262, -0.3571, -0.0425, -0.1147,
-0.4207, -0.4552, -0.5277, -0.9584, -0.4177, -0.7396, -0.2967, 0.5085,
0.4546, -1.2198, -0.3522, -1.2640],
[-0.3674, -0.1447, -0.1991, -0.0821, 0.1701, -0.1773, 0.0430, 0.1324,
-0.4271, 0.7299, -0.4561, 0.2915, -0.5657, -0.1855, -0.2123, -0.1147,
-0.0413, -0.8311, -0.6396, -1.0451, -0.4177, -0.7396, -0.2967, -0.4028,
0.7631, -1.2198, -0.3522, -1.2640]])
当我将模型训练为:
Epochs = 10
batch_size = 32
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=5e-4)
for epoch in range(Epochs):
print(f"Epoch {epoch + 1}")
for n, (X, y) in enumerate(batches):
model.train()
optimizer.zero_grad()
y_pred = model(X)
loss = criterion(y_pred, y)
loss.backward()
optimizer.step()
model.eval()
accurate = 0
for X_instance, y_instance in zip(test_X, test_y):
if y_instance == round(model(X_instance.view(-1, 6, 28)).detach().item()):
accurate += 1
print(f"Accuracy test set: {accurate/len(test_X)}")
精度不能收敛:
Epoch 1
Accuracy test set: 0.23169107856191745
Sample params:
tensor([-0.3356, -0.0105, -0.3405, -0.0049, 0.0037, 0.1707, 0.2685, -0.3893,
-0.4707, -0.2872, -0.1544, -0.1455, 0.0393, 0.0774, -0.4194, 0.0780,
-0.2177, -0.3829, -0.4679, 0.0370, -0.0794, 0.0455, -0.1331, -0.0169,
-0.1551, -0.0348, 0.1746, -0.5163], grad_fn=<SelectBackward>)
tensor([ 0.2137, -0.2558, 0.1509, -0.0975, 0.5591, 0.0907, -0.1249, 0.3095,
0.2112, 0.3134, -0.1581, -0.3051, -0.3559, -0.0177, 0.1485, 0.4397,
-0.1441, 0.1705, 0.3230, -0.3236, 0.0692, 0.0920, -0.2691, -0.3695,
-0.0692, 0.3747, 0.0149, 0.5216], grad_fn=<SelectBackward>)
Epoch 2
Accuracy test set: 0.23049267643142476
Sample params:
tensor([-0.3483, -0.0144, -0.3512, 0.0213, -0.0081, 0.1777, 0.2674, -0.4031,
-0.4628, -0.3041, -0.1651, -0.1511, 0.0216, 0.0513, -0.4320, 0.0839,
-0.2602, -0.3629, -0.4541, 0.0398, -0.0768, 0.0432, -0.1150, -0.0160,
-0.1346, -0.0727, 0.1801, -0.5253], grad_fn=<SelectBackward>)
tensor([ 0.1879, -0.2534, 0.1461, -0.1141, 0.5735, 0.0872, -0.1286, 0.3273,
0.2084, 0.3037, -0.1535, -0.2934, -0.3870, -0.0252, 0.1492, 0.4752,
-0.1709, 0.1776, 0.3390, -0.3318, 0.0734, 0.1077, -0.2790, -0.3777,
-0.0518, 0.3726, 0.0228, 0.5404], grad_fn=<SelectBackward>)
Epoch 3
Accuracy test set: 0.22982689747003995
Sample params:
tensor([-0.3725, -0.0069, -0.3623, 0.0393, -0.0167, 0.1748, 0.2577, -0.4183,
-0.4681, -0.3196, -0.1657, -0.1613, 0.0122, 0.0268, -0.4361, 0.0838,
-0.2962, -0.3566, -0.4344, 0.0366, -0.0822, 0.0486, -0.1150, -0.0295,
-0.1080, -0.1094, 0.1841, -0.5336], grad_fn=<SelectBackward>)
tensor([ 0.1664, -0.2456, 0.1477, -0.1332, 0.5820, 0.0819, -0.1228, 0.3426,
0.2066, 0.2985, -0.1464, -0.2824, -0.4199, -0.0323, 0.1530, 0.5057,
-0.1991, 0.1856, 0.3407, -0.3347, 0.0800, 0.1203, -0.2791, -0.3863,
-0.0426, 0.3760, 0.0327, 0.5641], grad_fn=<SelectBackward>)
Epoch 4
Accuracy test set: 0.23249001331557922
Sample params:
tensor([-0.3945, 0.0032, -0.3765, 0.0600, -0.0248, 0.1713, 0.2442, -0.4297,
-0.4741, -0.3311, -0.1653, -0.1667, 0.0029, 0.0066, -0.4373, 0.0738,
-0.3320, -0.3530, -0.4136, 0.0390, -0.0731, 0.0552, -0.1117, -0.0517,
-0.0871, -0.1455, 0.1841, -0.5359], grad_fn=<SelectBackward>)
tensor([ 0.1495, -0.2292, 0.1524, -0.1473, 0.5938, 0.0661, -0.1157, 0.3626,
0.2013, 0.2927, -0.1350, -0.2661, -0.4558, -0.0411, 0.1562, 0.5381,
-0.2279, 0.1927, 0.3319, -0.3431, 0.0852, 0.1402, -0.2747, -0.4026,
-0.0297, 0.3757, 0.0396, 0.5856], grad_fn=<SelectBackward>)
我在模型定义上犯了一个错误吗?
答案 0 :(得分:2)
因此,通常,LSTM中的6层可以达到很多目的。输入尺寸为28(您是在训练MNIST,还是输入字母?),所以10作为隐藏尺寸在本质上会变小。尝试以下参数:
hidden_dim = 128 to 512
layer_dim = 2 to max. 4
我看到您的输出形状为1,并且您未使用激活函数。您是否要预测整数(例如,“狗”类别为1,“猫”类别为2)?如果是这样,您应该切换到单热编码,以便您的输出形状等于您要预测的类。然后使用softmax作为最后一层的激活。