pytorch word cnn分类器不平衡二进制数据,都预测1类

时间:2018-06-08 06:52:53

标签: machine-learning nlp deep-learning pytorch

我使用Word级CNN构建二进制cnn。 模型代码是这样的:

class WordCNN(nn.Module):

    def __init__(self, n_classes, dictionary, args):
        super(WordCNN, self).__init__()

        mode = args.mode
        kernel_sizes = args.kernel_sizes # kernel_sizes=[3,4,5]

        vocabulary_size = dictionary.vocabulary_size
        vector_size = dictionary.vector_size
        embedding_weight = dictionary.embedding
        if embedding_weight is not None:
            embedding_weight = torch.FloatTensor(embedding_weight)
        self.mode = mode
        if self.mode == 'rand':
            self.embedding = nn.Embedding(vocabulary_size, vector_size)
            embed_size = vector_size
        elif self.mode == 'static':
            self.embedding = nn.Embedding(vocabulary_size, vector_size)
            self.embedding.weight = nn.Parameter(embedding_weight, requires_grad=False)
            embed_size = vector_size
        elif self.mode == 'non-static':
            self.embedding = nn.Embedding(vocabulary_size, vector_size)
            self.embedding.weight = nn.Parameter(embedding_weight, requires_grad=True)
            embed_size = vector_size
        elif self.mode == 'multichannel':
            self.static_embedding = nn.Embedding(vocabulary_size, vector_size)
            self.static_embedding.weight = nn.Parameter(embedding_weight, requires_grad=False)
            self.non_static_embedding = nn.Embedding(vocabulary_size, vector_size)
            self.non_static_embedding.weight = nn.Parameter(embedding_weight, requires_grad=True)
            embed_size = vector_size * 2
        else:
            raise NotImplementedError

        convs = [nn.Conv1d(in_channels=embed_size, out_channels=300, kernel_size=kernel_size) 
                       for kernel_size in kernel_sizes]
        self.conv_modules = nn.ModuleList(convs)
        self.tanh = nn.Tanh()
        self.dropout = nn.Dropout()
        self.linear = nn.Linear(in_features=900, out_features=n_classes)


def forward(self, sentences):

    if not self.mode == 'multichannel':
        embedded = self.embedding(sentences)
    else:
        static_embedded = self.static_embedding(sentences)
        non_static_embedded = self.non_static_embedding(sentences)
        embedded = torch.cat([static_embedded, non_static_embedded], dim=2)


    inputs = embedding.permute(1,0,2)
    inputs = [conv(inputs) for conv in self.convs] 
    inputs = [F.max_pool1d(i,i.size(2)) for i in inputs] 
    concated = torch.cat(inputs,1).squeeze(2)\
    concated = self.dropout(concated)
    return F.log_softmax(self.linear(concated),dim=1)

我尝试构建模型然后训练:

logger.info('Constructing Model...')
model = args.model(dictionary = 
dictionary,n_classes=preprocessor.n_classes,args=args)

if args.use_gpu:
    model = model.cuda()

logger.info('Training....')
trainable_params = [p for p in model.parameters() if p.requires_grad]
if args.optimizer == 'Adam':
    optimizer = optim.Adam(params = trainable_params,lr = args.initial_lr)
if args.optimizer == 'SGD':
    optimizer = optim.SGD(params = trainable_params,lr = args.initial_lr, momentum=0.9)
lr_plateau = optim.lr_scheduler.ReduceLROnPlateau(optimizer,mode = 'min',factor=0.7,patience = 2)
criterion = nn.CrossEntropyLoss
trainer = Trainer(model,train_dataloader,dev_dataloader,
         criterion=criterion,optimizer=optimizer,
         lr_scheduler=lr_plateau,
         use_gpu = args.use_gpu,logger = logger)
trainer.run(epochs = args.epochs)

Trianer代码是:

class Trainer():
    def __init__(self, 
        model,train_dataloader,dev_dataloader,criterion,optimizer, lr_scheduler,
            use_gpu=False, print_every = 1, save_every = 1, logger = None):
        self.model = model
        self.train_dataloader = train_dataloader
        self.dev_dataloader = dev_dataloader
        self.criterion = criterion(size_average=False)
        self.optimizer = optimizer
        self.lr_scheduler = lr_scheduler

        self.print_every = print_every
        self.save_every = save_every

        self.epoch = 0
        self.epoch_losses = []
        self.epoch_metrics = []
        self.val_epoch_losses=[]
        self.val_epoch_metrics = []
        self.use_gpu = use_gpu
        self.logger = logger

        self.base_message = ("Epoch: {epoch:<3d} "
                         "Progress: {progress:<.1%} ({elapsed}) "
                         "Train Loss: {train_loss:<.6} "
                         "Train Acc: {train_metric:<.1%} "
                         "Val Loss: {val_loss:<.6} "
                         "Val Acc: {val_metric:<.1%} "
                         "Learning rate: {learning_rate:<.4} "
                        )
        self.start_time = datetime.now()

def train(self):
    self.model.train()
    self.batch_losses = []
    self.batch_metrics = []
    for inputs,targets in self.train_dataloader:
        if self.use_gpu:
            inputs,targets = Variable(inputs.cuda()),Variable(targets.cuda())
        else:
            inputs,targets = Variable(inputs),Variable(targets)

        self.optimizer.zero_grad()
        outputs = self.model(inputs)
        batch_loss = self.criterion(outputs, targets)
        batch_metric = self.accuracy(outputs,targets)

        batch_loss.backward()
        #nn.utils.clip_grad_norm(self.model.parameters(), 5)
        self.optimizer.step()
        self.batch_losses.append(batch_loss.data)
        self.batch_metrics.append(batch_metric.data)
        if self.epoch == 0:
            break

    self.model.eval()
    self.val_batch_losses = []
    self.val_batch_metrics = []
    for val_inputs, val_targets in self.dev_dataloader:
        if self.use_gpu:
            val_inputs, val_targets = Variable(val_inputs.cuda()), Variable(val_targets.cuda())
        else:
            val_inputs, val_targets = Variable(val_inputs), Variable(val_targets)

        val_outputs = self.model(val_inputs)
        val_batch_loss = self.criterion(val_outputs, val_targets)
        val_batch_metric = self.accuracy(val_outputs, val_targets)
        self.val_batch_losses.append(val_batch_loss.data)
        self.val_batch_metrics.append(val_batch_metric.data)

    train_data_size = len(self.train_dataloader.dataset)
    epoch_loss = torch.cat(self.batch_losses).sum() / train_data_size
    epoch_metric = torch.cat(self.batch_metrics).sum() / train_data_size

    val_data_size = len(self.dev_dataloader.dataset)
    val_epoch_loss = torch.cat(self.val_batch_losses).sum() / val_data_size
    val_epoch_metric = torch.cat(self.val_batch_metrics).sum() / val_data_size
    return epoch_loss, epoch_metric, val_epoch_loss,val_epoch_metric

def run(self,epochs= 10):
    for epoch in range(self.epoch, epochs + 1):
        self.epoch = epoch
        epoch_loss, epoch_metric, val_epoch_loss, val_epoch_metric = self.train()

        self.lr_scheduler.step(val_epoch_loss)

        self.epoch_losses.append(epoch_loss)
        self.epoch_metrics.append(epoch_metric)
        self.val_epoch_losses.append(val_epoch_loss)
        self.val_epoch_metrics.append(val_epoch_metric)

        if epoch % self.print_every == 0:
            current_lr= self.optimizer.param_groups[0]['lr']
            message = self.base_message.format(epoch=epoch, 
                                               progress=epoch/epochs, 
                                               train_loss=epoch_loss,
                                               train_metric=epoch_metric,
                                               val_loss=val_epoch_loss,
                                               val_metric=val_epoch_metric,
                                               learning_rate=current_lr,
                                               elapsed=self.elapsed_time()
                                              )
            self.logger.info(message)
        if epoch % self.save_every == 0:
            self.logger.info('Saving the model....')
            self.save_model()

def accuracy(self,outputs,labels):
    maximum,argmax = outputs.max(dim =1)
    corrects = argmax == labels
    n_corrects = corrects.float().sum()
    return  n_corrects

def elapsed_time(self):
    now = datetime.now()
    elapsed = now - self.start_time
    return str(elapsed)

def save_model(self):
    base_dir = dirname(abspath(__file__))
    checkpoint_dir = join(base_dir,'chk')
    model_name = self.model.__class__.__name__
    base_filename = '{model_name}-{start_time}-{epoch}.pth'
    checkpoint_filename = base_filename.format(model_name = model_name,
                                              start_time = self.start_time,
                                              epoch = self.epoch)
    checkpoint_filepath = join(checkpoint_dir, checkpoint_filename)
    torch.save(self.model.state_dict(),checkpoint_filepath)
    self.last_checkpoint_filepath = checkpoint_filepath
    if max(self.val_epoch_metrics) == self.val_epoch_metrics[-1]:
        self.best_checkpoint_file_path = checkpoint_filepath

显示日志:验证数据集的准确度全部相同,等于63.3%,这也是val数据的标签分布。训练数据相同。 我尝试了很多模型,它给了我相同的结果 PS:initial_lr是0.00001
    batch_size是64
    hidden_​​size是300
    kernal_size是[3,4,5]
    亚当是优化者     max_seq_len是600

0 个答案:

没有答案