我想构建BiLSTM + CRF模型来解决NER问题。建立基于pytorch的模型。 但是,每个Epoche的发射矩阵都没有改变。
我修改了pytorch的官方模型代码。在bilstm层中,我压缩了每个批次并将其输入到lstm层中,但是输出发射矩阵保持不变。我不知道为什么BiLSTM层不起作用。 请帮助我,谢谢。
这是我的代码。
import utils
#1、先用embedding测试一下
'''random.seed(1)
np.random.seed(1)
torch.manual_seed(1)'''
class BILSTM_CRF(nn.Module):
def __init__(self,batch_size,vocab_size,tag_to_idx,embed_dim,hidden_dim,word_to_idx):
super(BILSTM_CRF,self).__init__()
self.batch_size = batch_size
self.embed_dim = embed_dim
self.hidden_dim = hidden_dim
self.tag_to_idx = tag_to_idx
self.vocab_size = vocab_size
self.target_size = len(tag_to_idx) # 要带着start和end
self.word_to_idx = word_to_idx
self.embedding = nn.Embedding(vocab_size,embed_dim)
self.lstm = nn.LSTM(embed_dim,hidden_dim // 2,
bidirectional=True,
batch_first=True)
# 发射函数
self.emission = nn.Linear(hidden_dim,self.target_size)
# crf 部分,参数的初始化
# 注意:t[i][j]是指由tag j转移到tag i
self.transitions = nn.Parameter(torch.randn(self.target_size,self.target_size,requires_grad=True))
# 其中start不会由任何tag转移
# end不会转移任何到tag,所以更新这两个的转移值变得很小
self.transitions.data[tag_to_idx["<start>"],:] = -10000
self.transitions.data[:,tag_to_idx["<end>"]] = -10000
# 通过bilstm层获得发射函数
def _get_lstm_features(self,sents):
h0 = torch.randn(2,self.batch_size,self.hidden_dim // 2)
c0 = torch.randn(2, self.batch_size, self.hidden_dim // 2)
lengths = utils.get_lengths(sents) # 导入的序列长度
# 下面是修改了官方代码,将整个batch压缩后放进lstm层计算
input_tensor = utils.pad_sents(sents,self.word_to_idx) # 填充与编码sents
# print("input",input_tensor)
emb = self.embedding(input_tensor)
# print(emb.shape)
# 压缩
input_packed = torch.nn.utils.rnn.pack_padded_sequence(emb,lengths,
batch_first=True)
output_tensor,_ = self.lstm(input_packed)
# 解压缩
output_tensor,_ = torch.nn.utils.rnn.pad_packed_sequence(output_tensor,
batch_first=True)
emission = self.emission(output_tensor) # size[batch,length,tag_size]
return emission
def neg_log_likelihood(self,sents,tags): # loss函数
# sents[batch,length] tags[batch,length]
feats = self._get_lstm_features(sents) # size[batch,length,tag_size]
# !!!!!!For each epoche, the output here is the same
print("feats",feats)
forward_score = self._forward_alg(feats) # size[batch,1]
# print("forward_score",forward_score.shape)
gold_score = self._score_sentence(feats,tags) # size[batch,1]
# print("gold_score",gold_score.shape)
losses = forward_score - gold_score # size[batch,1]
loss = torch.sum(losses)
return loss,feats
utils是一个包含一些工具功能的软件包 显示在下面
# 一些小函数
import torch
# 统计序列长度
def get_lengths(sents):
lengths = []
for sent in sents:
lengths.append(len(sent))
return lengths
# 填充序列等长
# 放进来的sents是排好序的
# pad是填充内容
# default是不在字典中的默认内容,<unk>
def pad_sents(sents,word_map):
UNK = word_map.get('<unk>')
PAD = word_map.get('<pad>')
max_len = len(sents[0])
# batch_tensor = torch.ones(len(sents),max_len,dtype=torch.long,requires_grad=True)*pad
batch_tensor = torch.ones(len(sents),max_len,requires_grad=True).long()*PAD
for i,l in enumerate(sents):
for j,e in enumerate(l):
batch_tensor[i][j] = word_map.get(e,UNK)
return batch_tensor
# 句子按照长短排序用
def sort_pairs(sents_list,tag_list):
pairs = list(zip(sents_list,tag_list))
# print(pairs)
indices = sorted(range(len(pairs)),
key = lambda k:len(pairs[k][0]),
reverse = True)
pairs = [pairs[i] for i in indices]
sents_list,tag_list = list(zip(*pairs))
return sents_list,tag_list