我目前正在使用pytorch编写第一个程序。关于网络的输出真的很困惑。我正在输入250个批次,输出为3个类。因此,据我所知,使用CrossEntropyLoss的输出需要具有([250,3])的张量形状,但是在应用softmax之后,我得到的是[[8250,3]),试图编辑形状也会引发错误,因为显然(8250,3)无法重塑为(250,3),不确定批次如何乘以33x?希望能得到一些简单的解释帮助。
谢谢! :)
vocabularySize = len(wordToNoDict) + 1
output = 3
embedding = 400
hiddenDimension = 512
layers = 2
classifierModel = Classifier.HateSpeechDetector(device, vocabularySize, output, embedding, hiddenDimension, layers)
classifierModel.to(device)
trainClassifier(classifierModel, trainingLoad, validationLoad, device, batchSize)
path = './state_dict.pt'
weight = torch.tensor([15389/3407, 15389/15389, 15389/800])
criterion = nn.CrossEntropyLoss(weight=weight)
#test(classifierModel, path, testingLoad, batchSize, device, criterion)
def trainClassifier(model, trainingData, validationData, device, batchSize):
weight = torch.tensor([1.2, 1.0, 1.8])
epochs = 2
counter = 0
testWithValiEvery = 10
clip = 5
valid_loss_min = np.Inf
lr=0.005
weight = torch.tensor([15389/3407, 15389/15389, 15389/800])
criterion = nn.CrossEntropyLoss(weight=weight)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
model.train()
for i in range(epochs):
h = model.init_hidden(batchSize, device)
for inputs, labels in trainingData:
h = tuple([e.data for e in h])
inputs, labels = inputs.to(device), labels.to(device)
model.zero_grad()
output, h = model(inputs, h)
loss = criterion(output.squeeze(), labels.float())
loss.backward()
nn.utils.clip_grad_norm_(model.parameters(), clip)
optimizer.step()
counter += 1
print(counter)
if counter%testWithValiEvery == 0:
print("validating")
val_h = model.init_hidden(batchSize, device)
val_losses = []
model.eval()
for inp, lab in validationData:
val_h = tuple([each.data for each in val_h])
inp, lab = inp.to(device), lab.to(device)
out, val_h = model(inp, val_h)
val_loss = criterion(out.squeeze(), lab.float())
val_losses.append(val_loss.item())
model.train()
print("Epoch: {}/{}...".format(i+1, epochs),
"Step: {}...".format(counter),
"Loss: {:.6f}...".format(loss.item()),
"Val Loss: {:.6f}".format(np.mean(val_losses)))
if np.mean(val_losses) <= valid_loss_min:
torch.save(model.state_dict(), './state_dict.pt')
print('Validation loss decreased ({:.6f} --> {:.6f}). Saving model ...'.format(valid_loss_min,np.mean(val_losses)))
valid_loss_min = np.mean(val_losses)
class HateSpeechDetector(nn.Module):
def __init__(self, device, vocabularySize, output, embedding, hidden, layers, dropProb=0.5):
super(HateSpeechDetector, self).__init__()
#Number of outputs (Classes/Categories)
self.output = output
#Number of layers in the LSTM
self.numLayers = layers
#Number of hidden neurons in each LSTM layer
self.hiddenDimensions = hidden
#Device being used for by model (CPU or GPU)
self.device = device
#Embedding layer finds correlations in words by converting word integers into vectors
self.embedding = nn.Embedding(vocabularySize, embedding)
#LSTM stores important data in memory, using it to help with future predictions
self.lstm = nn.LSTM(embedding,hidden,layers,dropout=dropProb,batch_first=True)
#Dropout is used to randomly drop nodes. This helps to prevent overfitting of the model during training
self.dropout = nn.Dropout(dropProb)
#Establishing 4 simple layers and a sigmoid output
self.fc = nn.Linear(hidden, hidden)
self.fc2 = nn.Linear(hidden, hidden)
self.fc3 = nn.Linear(hidden, hidden)
self.fc4 = nn.Linear(hidden, hidden)
self.fc5 = nn.Linear(hidden, hidden)
self.fc6 = nn.Linear(hidden, output)
self.softmax = nn.Softmax(dim=1)
def forward(self, x, hidden):
batchSize = x.size(0)
print(batchSize)
x = x.long()
print(x)
embeds = self.embedding(x)
print(embeds.shape)
lstm_out, hidden = self.lstm(embeds, hidden)
print(lstm_out.shape)
lstm_out = lstm_out.contiguous().view(-1,self.hiddenDimensions)
print(lstm_out.shape)
out = self.dropout(lstm_out)
out = self.fc(out)
out = self.fc2(out)
out = self.fc3(out)
out = self.fc4(out)
out = self.fc5(out)
out = self.fc6(out)
out = self.softmax(out)
print(out.shape)
out = out.view(batchSize, 3)
return out, hidden
def init_hidden(self, batchSize, device):
weight = next(self.parameters()).data
print(weight)
hidden = (weight.new(self.numLayers, batchSize, self.hiddenDimensions).zero_().to(device), weight.new(self.numLayers, batchSize, self.hiddenDimensions).zero_().to(device))
print(hidden)
return hidden