我想使用XLMRobertaForSequenceClassification
将序列分类为1
或0
。
MODEL_NAME = 'xlm-roberta-base'
def multilingual_model(max_seq_length=SEQUENCE_LENGTH, trainable=False):
"""Build and return a multilingual BERT model and tokenizer."""
model = XLMRobertaForSequenceClassification.from_pretrained(
MODEL_NAME,
num_labels = 2,
output_attentions = False,
output_hidden_states = False,
)
return model
培训师是
from transformers import Trainer
model = multilingual_model()
trainer = Trainer(
model=model,
args=training_args,
train_dataset=part_train_dataset,
eval_dataset=part_valid_dataset,
compute_metrics=compute_metrics)
training_args
from transformers import TrainingArguments
BATCH_SIZE = 32
DEVICE = torch.device("cpu")
training_args = TrainingArguments("/kaggle/working")
training_args.do_train = True
training_args.evaluate_during_training = True
training_args.adam_epsilon = 1e-8
training_args.learning_rate = 1e-5
training_args.per_gpu_train_batch_size = BATCH_SIZE
training_args.num_train_epochs=TRAIN_EPOCH
compute_metrics
from transformers import EvalPrediction
from typing import Dict
import numpy as np
def compute_metrics(p: EvalPrediction) -> Dict:
preds = np.argmax(p.predictions, axis=1)
return metrics.roc_auc_score(preds, p.label_ids)
摘录的part_train_dataset
[InputFeatures(input_ids=[0, 99070, 1159, 11050, 8108, 398, 6244, 7, 10932, 98, 759, 4488, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], attention_mask=[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], token_type_ids=None, label=1),
InputFeatures(input_ids=[0, 28192, 2367, 83, 442, 22120, 2367, 83, 442, 142, 97629, 21115, 111, 3060, 102172, 20397, 761, 7, 2750, 621, 4127, 99, 163684, 214, 15970, 6, 140545, 297, 7398, 1419, 2750, 2], attention_mask=[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], token_type_ids=None, label=1)
类似地,part_valid_dataset
之一:
[InputFeatures(input_ids=[0, 99070, 1159, 11050, 8108, 398, 6244, 7, 10932, 98, 759, 4488, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], attention_mask=[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], token_type_ids=None, label=1),
InputFeatures(input_ids=[0, 28192, 2367, 83, 442, 22120, 2367, 83, 442, 142, 97629, 21115, 111, 3060, 102172, 20397, 761, 7, 2750, 621, 4127, 99, 163684, 214, 15970, 6, 140545, 297, 7398, 1419, 2750, 2], attention_mask=[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], token_type_ids=None, label=1),
运行trainer.train()
时,出现以下错误:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-11-3435b262f1ae> in <module>
----> 1 trainer.train()
/opt/conda/lib/python3.7/site-packages/transformers/trainer.py in train(self, model_path)
380 continue
381
--> 382 tr_loss += self._training_step(model, inputs, optimizer)
383
384 if (step + 1) % self.args.gradient_accumulation_steps == 0 or (
/opt/conda/lib/python3.7/site-packages/transformers/trainer.py in _training_step(self, model, inputs, optimizer)
465 inputs[k] = v.to(self.args.device)
466
--> 467 outputs = model(**inputs)
468 loss = outputs[0] # model outputs are always tuple in transformers (see doc)
469
/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
548 result = self._slow_forward(*input, **kwargs)
549 else:
--> 550 result = self.forward(*input, **kwargs)
551 for hook in self._forward_hooks.values():
552 hook_result = hook(self, input, result)
/opt/conda/lib/python3.7/site-packages/transformers/modeling_roberta.py in forward(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, labels)
355 else:
356 loss_fct = CrossEntropyLoss()
--> 357 loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
358 outputs = (loss,) + outputs
359
/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
548 result = self._slow_forward(*input, **kwargs)
549 else:
--> 550 result = self.forward(*input, **kwargs)
551 for hook in self._forward_hooks.values():
552 hook_result = hook(self, input, result)
/opt/conda/lib/python3.7/site-packages/torch/nn/modules/loss.py in forward(self, input, target)
930 def forward(self, input, target):
931 return F.cross_entropy(input, target, weight=self.weight,
--> 932 ignore_index=self.ignore_index, reduction=self.reduction)
933
934
/opt/conda/lib/python3.7/site-packages/torch/nn/functional.py in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction)
2315 if size_average is not None or reduce is not None:
2316 reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 2317 return nll_loss(log_softmax(input, 1), target, weight, None, ignore_index, None, reduction)
2318
2319
/opt/conda/lib/python3.7/site-packages/torch/nn/functional.py in nll_loss(input, target, weight, size_average, ignore_index, reduce, reduction)
2113 .format(input.size(0), target.size(0)))
2114 if dim == 2:
-> 2115 ret = torch._C._nn.nll_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
2116 elif dim == 4:
2117 ret = torch._C._nn.nll_loss2d(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
RuntimeError: expected scalar type Long but found Float
如果num_labels
为1则不存在。从transformers
的github来看,似乎2个标签是二进制分类的标准。
除了如何解决错误外,我想问一下为什么attention_mask
/ part_train
的{{1}}中有零