变压器培训师序列分类问题

时间:2020-05-24 15:06:41

标签: machine-learning nlp pytorch huggingface-transformers

我想使用XLMRobertaForSequenceClassification将序列分类为10

MODEL_NAME = 'xlm-roberta-base'

def multilingual_model(max_seq_length=SEQUENCE_LENGTH, trainable=False):
    """Build and return a multilingual BERT model and tokenizer."""
    model = XLMRobertaForSequenceClassification.from_pretrained(
        MODEL_NAME,
        num_labels = 2, 
        output_attentions = False, 
        output_hidden_states = False,
    )
    return model

培训师是

from transformers import Trainer

model = multilingual_model()

trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=part_train_dataset,
        eval_dataset=part_valid_dataset,
        compute_metrics=compute_metrics)

training_args

from transformers import TrainingArguments
BATCH_SIZE = 32
DEVICE = torch.device("cpu")
training_args = TrainingArguments("/kaggle/working")
training_args.do_train = True
training_args.evaluate_during_training = True
training_args.adam_epsilon = 1e-8
training_args.learning_rate = 1e-5
training_args.per_gpu_train_batch_size = BATCH_SIZE
training_args.num_train_epochs=TRAIN_EPOCH

compute_metrics

from transformers import EvalPrediction
from typing import Dict
import numpy as np

def compute_metrics(p: EvalPrediction) -> Dict:
    preds = np.argmax(p.predictions, axis=1)
    return metrics.roc_auc_score(preds, p.label_ids)

摘录的part_train_dataset

[InputFeatures(input_ids=[0, 99070, 1159, 11050, 8108, 398, 6244, 7, 10932, 98, 759, 4488, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], attention_mask=[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], token_type_ids=None, label=1),
 InputFeatures(input_ids=[0, 28192, 2367, 83, 442, 22120, 2367, 83, 442, 142, 97629, 21115, 111, 3060, 102172, 20397, 761, 7, 2750, 621, 4127, 99, 163684, 214, 15970, 6, 140545, 297, 7398, 1419, 2750, 2], attention_mask=[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], token_type_ids=None, label=1)

类似地,part_valid_dataset之一:

[InputFeatures(input_ids=[0, 99070, 1159, 11050, 8108, 398, 6244, 7, 10932, 98, 759, 4488, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], attention_mask=[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], token_type_ids=None, label=1),
 InputFeatures(input_ids=[0, 28192, 2367, 83, 442, 22120, 2367, 83, 442, 142, 97629, 21115, 111, 3060, 102172, 20397, 761, 7, 2750, 621, 4127, 99, 163684, 214, 15970, 6, 140545, 297, 7398, 1419, 2750, 2], attention_mask=[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], token_type_ids=None, label=1),

运行trainer.train()时,出现以下错误:

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-11-3435b262f1ae> in <module>
----> 1 trainer.train()

/opt/conda/lib/python3.7/site-packages/transformers/trainer.py in train(self, model_path)
    380                     continue
    381 
--> 382                 tr_loss += self._training_step(model, inputs, optimizer)
    383 
    384                 if (step + 1) % self.args.gradient_accumulation_steps == 0 or (

/opt/conda/lib/python3.7/site-packages/transformers/trainer.py in _training_step(self, model, inputs, optimizer)
    465             inputs[k] = v.to(self.args.device)
    466 
--> 467         outputs = model(**inputs)
    468         loss = outputs[0]  # model outputs are always tuple in transformers (see doc)
    469 

/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    548             result = self._slow_forward(*input, **kwargs)
    549         else:
--> 550             result = self.forward(*input, **kwargs)
    551         for hook in self._forward_hooks.values():
    552             hook_result = hook(self, input, result)

/opt/conda/lib/python3.7/site-packages/transformers/modeling_roberta.py in forward(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, labels)
    355             else:
    356                 loss_fct = CrossEntropyLoss()
--> 357                 loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
    358             outputs = (loss,) + outputs
    359 

/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    548             result = self._slow_forward(*input, **kwargs)
    549         else:
--> 550             result = self.forward(*input, **kwargs)
    551         for hook in self._forward_hooks.values():
    552             hook_result = hook(self, input, result)

/opt/conda/lib/python3.7/site-packages/torch/nn/modules/loss.py in forward(self, input, target)
    930     def forward(self, input, target):
    931         return F.cross_entropy(input, target, weight=self.weight,
--> 932                                ignore_index=self.ignore_index, reduction=self.reduction)
    933 
    934 

/opt/conda/lib/python3.7/site-packages/torch/nn/functional.py in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction)
   2315     if size_average is not None or reduce is not None:
   2316         reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 2317     return nll_loss(log_softmax(input, 1), target, weight, None, ignore_index, None, reduction)
   2318 
   2319 

/opt/conda/lib/python3.7/site-packages/torch/nn/functional.py in nll_loss(input, target, weight, size_average, ignore_index, reduce, reduction)
   2113                          .format(input.size(0), target.size(0)))
   2114     if dim == 2:
-> 2115         ret = torch._C._nn.nll_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
   2116     elif dim == 4:
   2117         ret = torch._C._nn.nll_loss2d(input, target, weight, _Reduction.get_enum(reduction), ignore_index)

RuntimeError: expected scalar type Long but found Float

如果num_labels为1则不存在。从transformers的github来看,似乎2个标签是二进制分类的标准。

除了如何解决错误外,我想问一下为什么attention_mask / part_train的{​​{1}}中有零

0 个答案:

没有答案