我在PyTorch中有以下用于文本分类的模型:
EMBEDDING_DIM = 32
NUM_CATS = 58
MAX_VOCAB_SIZE = 10000
SENTENCE_LENGTH = 32
class BOWTextClassifier(nn.Module):
def __init__(self, vocab_size = MAX_VOCAB_SIZE, sentence_length = SENTENCE_LENGTH, embedding_dim = EMBEDDING_DIM, num_categories = NUM_CATS):
super(BOWTextClassifier, self).__init__()
self.embeddings = nn.Embedding(vocab_size, embedding_dim)
self.linear1 = nn.Linear(sentence_length * embedding_dim, 128)
self.linear2 = nn.Linear(128, num_categories)
def forward(self, inputs):
embeds = self.embeddings(inputs).view((1, -1))
out = F.relu(self.linear1(embeds))
out = self.linear2(out)
log_probs = F.log_softmax(out, dim=1)
return log_probs
此外,我编写了以下Dataset类,该类提供给Dataloader来生成大小为4的批处理。
class Dataset(data.Dataset):
def __init__(self, texts, labels):
self.labels = labels
self.texts = texts
def __len__(self):
return len(self.labels)
def __getitem__(self, index):
# Load data and get label
X = torch.tensor(self.texts[index])
y = torch.tensor(self.labels[index])
return X, y
training_set = Dataset(texts, cats)
training_generator = data.DataLoader(training_set, batch_size = 4, shuffle = False)
但是,当我尝试使用以下代码训练模型时,出现size mismatch, m1: [1 x 4096], m2: [1024 x 128]
错误。
loss_function = nn.NLLLoss()
model = BOWTextClassifier()
optimizer = optim.SGD(model.parameters(), lr=0.001)
for epoch in range(5):
for batch_x, batch_y in training_generator:
optimizer.zero_grad()
log_probs = model(batch_x)
loss = loss_function(log_probs, batch_y)
loss.backward()
optimizer.step()
print(loss)
我的texts
列表包含500个32个数字的列表,而我的cats
列表包含500个数字。
我的直觉是,嵌入层以某种方式使输入批次变平,导致输出的大小为4096,而不是1024。但是,我不知道如何解决此问题。