当我运行以下代码时
from transformers import AutoTokenizer, AutoModelForQuestionAnswering
import torch
tokenizer = AutoTokenizer.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
model = AutoModelForQuestionAnswering.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
text = r"""
As checked Dis is not yet on boarded to ARB portal, hence we cannot upload the invoices in portal
"""
questions = [
"Dis asked if it is possible to post the two invoice in ARB.I have not access so I wanted to check if you would be able to do it.",
]
for question in questions:
inputs = tokenizer.encode_plus(question, text, add_special_tokens=True, return_tensors="pt")
input_ids = inputs["input_ids"].tolist()[0]
text_tokens = tokenizer.convert_ids_to_tokens(input_ids)
answer_start_scores, answer_end_scores = model(**inputs)
answer_start = torch.argmax(
answer_start_scores
) # Get the most likely beginning of answer with the argmax of the score
answer_end = torch.argmax(answer_end_scores) + 1 # Get the most likely end of answer with the argmax of the score
answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end]))
print(f"Question: {question}")
print(f"Answer: {answer}\n")
我在这里得到的答案是:
Question: Dis asked if it is possible to post the two invoice in ARB.I have not access so I wanted to check if you would be able to do it.
Answer: dis is not yet on boarded to ARB portal
如何获得此答案的分数?分数与运行Question-Answer管道时得到的分数非常相似。
我必须采用这种方法,因为使用问答管道时,以下代码会给我带来关键错误
from transformers import pipeline
nlp = pipeline("question-answering")
context = r"""
As checked Dis is not yet on boarded to ARB portal, hence we cannot upload the invoices in portal.
"""
print(nlp(question="Dis asked if it is possible to post the two invoice in ARB?", context=context))
答案 0 :(得分:0)
这是我获得分数的尝试。看来我无法弄清楚feature.p_mask
是什么。因此,我目前无法删除构成softmax的非上下文索引。
# ... assuming imports and question and context
model_name="deepset/roberta-base-squad2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForQuestionAnswering.from_pretrained(model_name)
inputs = tokenizer(question, context,
add_special_tokens=True,
return_tensors='pt')
input_ids = inputs['input_ids'].tolist()[0]
outputs = model(**inputs)
# used to compute score
start = outputs.start_logits.detach().numpy()
end = outputs.end_logits.detach().numpy()
# from source code
# Ensure padded tokens & question tokens cannot belong to the set of candidate answers.
#?? undesired_tokens = np.abs(np.array(feature.p_mask) - 1) & feature.attention_mask
# Generate mask
undesired_tokens = inputs['attention_mask']
undesired_tokens_mask = undesired_tokens == 0.0
# Make sure non-context indexes in the tensor cannot contribute to the softmax
start_ = np.where(undesired_tokens_mask, -10000.0, start)
end_ = np.where(undesired_tokens_mask, -10000.0, end)
# Normalize logits and spans to retrieve the answer
start_ = np.exp(start_ - np.log(np.sum(np.exp(start_), axis=-1, keepdims=True)))
end_ = np.exp(end_ - np.log(np.sum(np.exp(end_), axis=-1, keepdims=True)))
# Compute the score of each tuple(start, end) to be the real answer
outer = np.matmul(np.expand_dims(start_, -1), np.expand_dims(end_, 1))
# Remove candidate with end < start and end - start > max_answer_len
max_answer_len = 15
candidates = np.tril(np.triu(outer), max_answer_len - 1)
scores_flat = candidates.flatten()
idx_sort = [np.argmax(scores_flat)]
start, end = np.unravel_index(idx_sort, candidates.shape)[1:]
end += 1
score = candidates[0, start, end-1]
start, end, score = start.item(), end.item(), score.item()
print(tokenizer.decode(input_ids[start:end]))
print(score)
查看更多source code