我使用Word级CNN构建二进制cnn。 模型代码是这样的:
class WordCNN(nn.Module):
def __init__(self, n_classes, dictionary, args):
super(WordCNN, self).__init__()
mode = args.mode
kernel_sizes = args.kernel_sizes # kernel_sizes=[3,4,5]
vocabulary_size = dictionary.vocabulary_size
vector_size = dictionary.vector_size
embedding_weight = dictionary.embedding
if embedding_weight is not None:
embedding_weight = torch.FloatTensor(embedding_weight)
self.mode = mode
if self.mode == 'rand':
self.embedding = nn.Embedding(vocabulary_size, vector_size)
embed_size = vector_size
elif self.mode == 'static':
self.embedding = nn.Embedding(vocabulary_size, vector_size)
self.embedding.weight = nn.Parameter(embedding_weight, requires_grad=False)
embed_size = vector_size
elif self.mode == 'non-static':
self.embedding = nn.Embedding(vocabulary_size, vector_size)
self.embedding.weight = nn.Parameter(embedding_weight, requires_grad=True)
embed_size = vector_size
elif self.mode == 'multichannel':
self.static_embedding = nn.Embedding(vocabulary_size, vector_size)
self.static_embedding.weight = nn.Parameter(embedding_weight, requires_grad=False)
self.non_static_embedding = nn.Embedding(vocabulary_size, vector_size)
self.non_static_embedding.weight = nn.Parameter(embedding_weight, requires_grad=True)
embed_size = vector_size * 2
else:
raise NotImplementedError
convs = [nn.Conv1d(in_channels=embed_size, out_channels=300, kernel_size=kernel_size)
for kernel_size in kernel_sizes]
self.conv_modules = nn.ModuleList(convs)
self.tanh = nn.Tanh()
self.dropout = nn.Dropout()
self.linear = nn.Linear(in_features=900, out_features=n_classes)
def forward(self, sentences):
if not self.mode == 'multichannel':
embedded = self.embedding(sentences)
else:
static_embedded = self.static_embedding(sentences)
non_static_embedded = self.non_static_embedding(sentences)
embedded = torch.cat([static_embedded, non_static_embedded], dim=2)
inputs = embedding.permute(1,0,2)
inputs = [conv(inputs) for conv in self.convs]
inputs = [F.max_pool1d(i,i.size(2)) for i in inputs]
concated = torch.cat(inputs,1).squeeze(2)\
concated = self.dropout(concated)
return F.log_softmax(self.linear(concated),dim=1)
我尝试构建模型然后训练:
logger.info('Constructing Model...')
model = args.model(dictionary =
dictionary,n_classes=preprocessor.n_classes,args=args)
if args.use_gpu:
model = model.cuda()
logger.info('Training....')
trainable_params = [p for p in model.parameters() if p.requires_grad]
if args.optimizer == 'Adam':
optimizer = optim.Adam(params = trainable_params,lr = args.initial_lr)
if args.optimizer == 'SGD':
optimizer = optim.SGD(params = trainable_params,lr = args.initial_lr, momentum=0.9)
lr_plateau = optim.lr_scheduler.ReduceLROnPlateau(optimizer,mode = 'min',factor=0.7,patience = 2)
criterion = nn.CrossEntropyLoss
trainer = Trainer(model,train_dataloader,dev_dataloader,
criterion=criterion,optimizer=optimizer,
lr_scheduler=lr_plateau,
use_gpu = args.use_gpu,logger = logger)
trainer.run(epochs = args.epochs)
Trianer代码是:
class Trainer():
def __init__(self,
model,train_dataloader,dev_dataloader,criterion,optimizer, lr_scheduler,
use_gpu=False, print_every = 1, save_every = 1, logger = None):
self.model = model
self.train_dataloader = train_dataloader
self.dev_dataloader = dev_dataloader
self.criterion = criterion(size_average=False)
self.optimizer = optimizer
self.lr_scheduler = lr_scheduler
self.print_every = print_every
self.save_every = save_every
self.epoch = 0
self.epoch_losses = []
self.epoch_metrics = []
self.val_epoch_losses=[]
self.val_epoch_metrics = []
self.use_gpu = use_gpu
self.logger = logger
self.base_message = ("Epoch: {epoch:<3d} "
"Progress: {progress:<.1%} ({elapsed}) "
"Train Loss: {train_loss:<.6} "
"Train Acc: {train_metric:<.1%} "
"Val Loss: {val_loss:<.6} "
"Val Acc: {val_metric:<.1%} "
"Learning rate: {learning_rate:<.4} "
)
self.start_time = datetime.now()
def train(self):
self.model.train()
self.batch_losses = []
self.batch_metrics = []
for inputs,targets in self.train_dataloader:
if self.use_gpu:
inputs,targets = Variable(inputs.cuda()),Variable(targets.cuda())
else:
inputs,targets = Variable(inputs),Variable(targets)
self.optimizer.zero_grad()
outputs = self.model(inputs)
batch_loss = self.criterion(outputs, targets)
batch_metric = self.accuracy(outputs,targets)
batch_loss.backward()
#nn.utils.clip_grad_norm(self.model.parameters(), 5)
self.optimizer.step()
self.batch_losses.append(batch_loss.data)
self.batch_metrics.append(batch_metric.data)
if self.epoch == 0:
break
self.model.eval()
self.val_batch_losses = []
self.val_batch_metrics = []
for val_inputs, val_targets in self.dev_dataloader:
if self.use_gpu:
val_inputs, val_targets = Variable(val_inputs.cuda()), Variable(val_targets.cuda())
else:
val_inputs, val_targets = Variable(val_inputs), Variable(val_targets)
val_outputs = self.model(val_inputs)
val_batch_loss = self.criterion(val_outputs, val_targets)
val_batch_metric = self.accuracy(val_outputs, val_targets)
self.val_batch_losses.append(val_batch_loss.data)
self.val_batch_metrics.append(val_batch_metric.data)
train_data_size = len(self.train_dataloader.dataset)
epoch_loss = torch.cat(self.batch_losses).sum() / train_data_size
epoch_metric = torch.cat(self.batch_metrics).sum() / train_data_size
val_data_size = len(self.dev_dataloader.dataset)
val_epoch_loss = torch.cat(self.val_batch_losses).sum() / val_data_size
val_epoch_metric = torch.cat(self.val_batch_metrics).sum() / val_data_size
return epoch_loss, epoch_metric, val_epoch_loss,val_epoch_metric
def run(self,epochs= 10):
for epoch in range(self.epoch, epochs + 1):
self.epoch = epoch
epoch_loss, epoch_metric, val_epoch_loss, val_epoch_metric = self.train()
self.lr_scheduler.step(val_epoch_loss)
self.epoch_losses.append(epoch_loss)
self.epoch_metrics.append(epoch_metric)
self.val_epoch_losses.append(val_epoch_loss)
self.val_epoch_metrics.append(val_epoch_metric)
if epoch % self.print_every == 0:
current_lr= self.optimizer.param_groups[0]['lr']
message = self.base_message.format(epoch=epoch,
progress=epoch/epochs,
train_loss=epoch_loss,
train_metric=epoch_metric,
val_loss=val_epoch_loss,
val_metric=val_epoch_metric,
learning_rate=current_lr,
elapsed=self.elapsed_time()
)
self.logger.info(message)
if epoch % self.save_every == 0:
self.logger.info('Saving the model....')
self.save_model()
def accuracy(self,outputs,labels):
maximum,argmax = outputs.max(dim =1)
corrects = argmax == labels
n_corrects = corrects.float().sum()
return n_corrects
def elapsed_time(self):
now = datetime.now()
elapsed = now - self.start_time
return str(elapsed)
def save_model(self):
base_dir = dirname(abspath(__file__))
checkpoint_dir = join(base_dir,'chk')
model_name = self.model.__class__.__name__
base_filename = '{model_name}-{start_time}-{epoch}.pth'
checkpoint_filename = base_filename.format(model_name = model_name,
start_time = self.start_time,
epoch = self.epoch)
checkpoint_filepath = join(checkpoint_dir, checkpoint_filename)
torch.save(self.model.state_dict(),checkpoint_filepath)
self.last_checkpoint_filepath = checkpoint_filepath
if max(self.val_epoch_metrics) == self.val_epoch_metrics[-1]:
self.best_checkpoint_file_path = checkpoint_filepath
显示日志:验证数据集的准确度全部相同,等于63.3%,这也是val数据的标签分布。训练数据相同。
我尝试了很多模型,它给了我相同的结果
PS:initial_lr是0.00001
batch_size是64
hidden_size是300
kernal_size是[3,4,5]
亚当是优化者
max_seq_len是600