我的目标是用一对相似或不相似的物体训练一个暹罗神经网络。根据是否存在相关特性,每个对象都包含给定数量的二进制特征。然后,我以60:20:20(训练:验证:测试)的比例分割数据集,并随机配对对象。为了防止系统过度拟合,我从训练集中的对象中产生了额外的随机配对,最终产生了100,000个实例(相似度为50%,相异度为50%)。验证集大约有1,500个实例(也是平衡的)。 暹罗网络的架构如下:
class SNN(nn.Module):
def __init__(self, input_dim, output_dim, hidden_dim, linear_dropout, dist_fn, learning_rate,
binary, weight_decay):
super(SNN, self).__init__()
# Save model configs
self.dist_fn = dist_fn
self.binary = binary
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.encoder = nn.Sequential(
nn.Linear(input_dim, hidden_dim),
nn.Dropout(0.4),
nn.LeakyReLU(),
nn.Linear(hidden_dim, drug_embed_dim)).to(self.device)
# Distance function
self.dist_fc = nn.Linear(drug_embed_dim, 1).to(self.device)
# Get params and register optimizer
info, params = self.get_model_params()
self.optimiser = optim.Adam(params, lr=learning_rate,
weight_decay=weight_decay)
self.criterion = lambda x, y: y*torch.log(x) + (1-y)*torch.log(1-x)
def siamese_basic(self, inputs):
return self.encoder(inputs.float())
def distance_layer(self, vec1, vec2, distance='cos'):
if distance == 'cos':
similarity = F.cosine_similarity(
vec1 + 1e-16, vec2 + 1e-16, dim=-1)
elif distance == 'l1':
similarity = self.dist_fc(torch.abs(vec1 - vec2))
similarity = similarity.squeeze(1)
elif distance == 'l2':
similarity = self.dist_fc(torch.abs(vec1 - vec2) ** 2)
similarity = similarity.squeeze(1)
if self.binary:
similarity = F.sigmoid(similarity)
return similarity
def forward(self, key1, key2, targets, predict = False):
output1 = self.siamese_basic(key1)
output2 = self.siamese_basic(key2)
similarity = self.distance_layer(output1, output2, self.dist_fn)
if predict:
similarity_rounded = torch.round(similarity)
accuracy = torch.sum(torch.eq(similarity_rounded, targets)).item()
accuracy = accuracy/similarity_rounded.size()[0]*100
return accuracy,similarity,output1,output2
else:
return similarity, output1, output2
def get_loss(self, outputs, targets):
loss = self.criterion(outputs, targets)
loss = -torch.sum(loss)
return loss
def get_model_params(self):
params = []
total_size = 0
def multiply_iter(p_list):
out = 1
for p in p_list:
out *= p
return out
for p in self.parameters():
if p.requires_grad:
params.append(p)
total_size += multiply_iter(p.size())
return '{}\nparam size: {:,}\n'.format(self, total_size), params
我正在使用以下功能训练SNN:
def run_bi(model, epochs, loader, val_loader, args, metric, N, k, train=False, verbose = True):
episode = 0
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(model.optimiser, patience = 10, factor = 0.1)
for e in range(epochs):
for inputs1, inputs2, labels in tqdm(loader):
if (on_gpu):
inputs1, inputs2, labels = inputs1.cuda(), inputs2.cuda(), labels.cuda()
# Grad zero + mode change
model.optimiser.zero_grad()
if train:
model.train()
else:
model.eval()
# Get outputs
accuracy_train, similarity_train, output1, output2 = model(inputs1, inputs2, labels, predict = True)
loss_train = model.get_loss(similarity_train, labels)
# Optimize model
if train:
loss_train.backward()
nn.utils.clip_grad_norm(model.get_model_params()[1],5)
model.optimiser.step()
episode += 1
# Estimating metrics for each 50 episodes
if episode % 100 == 0:
for inputs1_val, inputs2_val, labels_val, in tqdm(val_loader):
if (on_gpu):
inputs1_val, inputs2_val, labels_val = inputs1_val.cuda(), inputs2_val.cuda(), labels_val.cuda()
accuracy_val, similarity_val, output1_val, output2_val = model(inputs1_val, inputs2_val, labels_val,predict = True)
loss_val = model.get_loss(similarity_val, labels_val)
scheduler.step(loss_epoch_train)
为了进行训练,我使用了非常小的批处理(batch_size = 25),辍学(如self.encoder所示),并且为了防止过度拟合,我增加了数据集。我的学习率也非常低(lr = 0.0001),并且我使用余弦作为距离函数。但是,该模型似乎在第一个时期就过拟合了,如下例所示:
我想知道您是否可以为我提供以下方面的指导:
1)为什么学习不能一概而论-我认为我已经实施了大多数策略来避免过拟合-? 2)为什么训练过程中会出现如此突然的颠簸?
以及您可能发现的任何其他相关问题。谢谢!
编辑:我更改了准确性的计算,因为它实际上只是在计算批次中正确的预测数,而不是实际的度量。尽管如此,图中观察到的趋势并没有改变。