我正在pytorch中实现类似Inception的CNN。在卷积层的块之后,我有三个完全连接的线性层,然后是S型激活,以提供最终的回归输出。我正在测试此网络中辍学层的效果,但这给了我一些意想不到的结果。
代码如下:
class MyInception(nn.Module):
def __init__(self, in_channels, verbose=False):
super(MyInception, self).__init__()
self.v = verbose
ic=in_channels; oc=16
self.inceptionBlock1 = InceptionBlock(in_channels=ic, out_channels=oc, maxpool=False, verbose=verbose)
self.inceptionBlock2 = InceptionBlock(in_channels=oc * 6, out_channels=oc, maxpool=False, verbose=verbose)
self.pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.regressor = nn.Sequential(
nn.Linear(oc * 6 * 35 * 35, 1024, bias=True),
nn.ReLU(inplace=True),
nn.Dropout(p=0.2, inplace=False), # <--- Dropout 1
nn.Linear(1024, 128, bias=True),
nn.ReLU(inplace=True),
nn.Dropout(p=0.2, inplace=False), # <--- Dropout 2
nn.Linear(128, 1, bias=True),
nn.Sigmoid()
)
def forward(self, x):
x = self.inceptionBlock1(x)
x = self.inceptionBlock2(x)
x = self.pool(x)
x = torch.flatten(x, 1)
x = self.regressor(x)
return x
def train(epochs=10, dot_every=25):
running = pd.DataFrame(columns=['Epoch','Round','TrainLoss','TestLoss','LearningRate'])
for epoch in range(epochs):
train_losses = []
model.train()
counter = 0
for images, targets in train_loader:
images = images.to(device)
targets = targets.to(device)
optimizer.zero_grad()
outputs = model(images)
loss = loss_fn(torch.flatten(outputs), targets)
train_losses.append( loss.item() )
loss.backward()
optimizer.step()
counter += 1
if counter % dot_every == 0:
print(".", end='.', flush=True)
test_loss = test()
else:
test_loss = -1.
lr = np.squeeze(scheduler.get_lr())
running = running.append(pd.Series([epoch, counter, loss.item(), test_loss, lr], index=running.columns), ignore_index=True)
test_loss = test()
train_loss = np.mean(np.asarray(train_losses))
running = running.append(pd.Series([epoch, counter, train_loss, test_loss, lr], index=running.columns), ignore_index=True)
print("")
print(f"Epoch {epoch+1}, Train Loss: {np.round(train_loss,4)}, Test Loss: {np.round(test_loss, 4)}, Learning Rate: {np.format_float_scientific(lr, precision=4)}")
return running
def test():
model.eval()
test_losses = []
for i, (images,targets) in enumerate(test_loader):
images = images.to(device)
targets = targets.to(device)
outputs = model(images)
loss = loss_fn(torch.flatten(outputs), targets)
test_losses.append( loss.item() )
mean_loss = np.mean(np.asarray(test_losses))
return mean_loss
# instantiate the model
model = MyInception(in_channels=4, verbose=False).to(device)
# define the optimizer and loss function
optimizer = Adam(model.parameters(), lr=0.001, weight_decay=0.0001)
loss_fn = nn.MSELoss()
# run it
results = train(epochs=10, dot_every=20)
以下是训练数据的MSE损失图。 (红色=无中断,绿色=仅第二中断,蓝色=仅第一中断,紫色=两个中断) 带有落差的游程在历元边界(垂直虚线)处的损失大大增加,而双落差甚至在时期10的开始时损失也有很大的跳跃。
重要的是测试损失。在第5个时期之后,这两种情况之间的稳定性要稳定得多,并且差别不大,所以也许我不在乎。但是我想了解发生了什么。
答案 0 :(得分:0)
我破了案。我意识到我在测试调用中将model.train()翻转为model.eval(),之后又没有将其设置回train()。由于Dropout在训练和评估模式下的行为有所不同,因此添加Dropout会发现该错误。