数据集为CIFAR10。我创建了一个类似于VGG的网络:
class FirstModel(nn.Module):
def __init__(self):
super(FirstModel, self).__init__()
self.vgg1 = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.Conv2d(16, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2,2),
nn.Dropout(0.2)
)
self.vgg2 = nn.Sequential(
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.Conv2d(32, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2,2),
nn.Dropout(0.2)
)
self.vgg3 = nn.Sequential(
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.Conv2d(64, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2,2),
nn.Dropout(0.2)
)
self.fc1 = nn.Linear(4 * 4 * 64, 4096)
self.relu = nn.ReLU()
self.fc2 = nn.Linear(4096, 4096)
self.fc3 = nn.Linear(4096, 10)
self.softmax = nn.Softmax()
self.dropout = nn.Dropout(0.5)
def forward(self, x):
x = self.vgg3(self.vgg2(self.vgg1(x)))
x = nn.Flatten()(x)
x = self.relu(self.fc1(x))
x = self.dropout(x)
x = self.relu(self.fc2(x))
x = self.dropout(x)
x = self.softmax(self.fc3(x))
return x
然后我训练它并可视化损失和准确性:
import matplotlib.pyplot as plt
from IPython.display import clear_output
def plot_history(train_history, val_history, title='loss'):
plt.figure()
plt.title('{}'.format(title))
plt.plot(train_history, label='train', zorder=1)
points = np.array(val_history)
steps = list(range(0, len(train_history) + 1, int(len(train_history) / len(val_history))))[1:]
plt.scatter(steps, val_history, marker='*', s=180, c='red', label='val', zorder=2)
plt.xlabel('train steps')
plt.legend(loc='best')
plt.grid()
plt.show()
def train_model(model, optimizer, train_dataloader, test_dataloader):
criterion = nn.CrossEntropyLoss()
train_loss_log = []
train_acc_log = []
val_loss_log = []
val_acc_log = []
for epoch in range(NUM_EPOCH):
model.train()
train_loss = 0.
train_size = 0
train_acc = 0.
for inputs, labels in train_dataloader:
inputs, labels = inputs.to(device), labels.to(device)
optimizer.zero_grad()
y_pred = model(inputs)
loss = criterion(y_pred, labels)
loss.backward()
optimizer.step()
train_loss += loss.item()
train_size += y_pred.size(0)
train_loss_log.append(loss.data / y_pred.size(0))
_, pred_classes = torch.max(y_pred, 1)
train_acc += (pred_classes == labels).sum().item()
train_acc_log.append(np.mean((pred_classes == labels).cpu().numpy()))
# блок validation
val_loss = 0.
val_size = 0
val_acc = 0.
model.eval()
with torch.no_grad():
for inputs, labels in test_dataloader:
inputs, labels = inputs.to(device), labels.to(device)
y_pred = model(inputs)
loss = criterion(y_pred, labels)
val_loss += loss.item()
val_size += y_pred.size(0)
_, pred_classes = torch.max(y_pred, 1)
val_acc += (pred_classes == labels).sum().item()
val_loss_log.append(val_loss/val_size)
val_acc_log.append(val_acc/val_size)
clear_output()
plot_history(train_loss_log, val_loss_log, 'loss')
plot_history(train_acc_log, val_acc_log, 'accuracy')
print('Train loss:', train_loss / train_size)
print('Train acc:', train_acc / train_size)
print('Val loss:', val_loss / val_size)
print('Val acc:', val_acc / val_size)
然后我训练模型:
first_model = FirstModel()
first_model.to(device)
optimizer = optim.RMSprop(first_model.parameters(), lr=0.001, momentum=0.9)
train_model(first_model_rms, optimizer, train_dataloader, test_dataloader)
损耗和精度不变(精度为0.1)。但是,如果优化器是带有动量的SGD,则一切正常(损耗和精度更改)。我已经尝试过更改动量和lr,但这无济于事。
应该解决什么问题?将不胜感激任何可能的建议!
答案 0 :(得分:1)
尝试进一步降低学习率.....如果这样对准确性和损失也没有影响,则将优化器更改为adams或其他内容,并以不同的学习率进行游戏。
答案 1 :(得分:1)
因此,首先,您不必像nn.CrossEntropyLoss
那样在“模型”中使用softmax,而且我也认为RMSprop不能与动量一起使用。