我想使用BabelNet API检索引理的同义词和上位词。我设法做到了,但是它仅适用于某些引理,对于其他引理,它会引发此错误:UnicodeEncodeError: 'charmap' codec can't encode character '\u2601' in position 646: character maps to <undefined>
。通过查看先前的相关问题,我发现必须打开utf-8编码的文件,但是我正在使用请求模块。查阅文档,我发现我可以强迫它使用utf-8。但是问题仍然存在。
这是产生问题的代码,如果您想重现它。
import requests
import json
KEY = 'KEY' # You can get 1000 babelcoins free
# Retrieve the information of a given synset
def retrieve_info_synset(id, x): # x == 'a': list of all lemmas, x == 'b': only the first lemma.
service_url = 'https://babelnet.io/v5/getSynset'
data = {
'id': id,
'key': KEY
}
response = requests.post(service_url, data=data)
response.encoding = 'utf-8'
if response.status_code == 200:
data = json.loads(response.text)
senses = data['senses']
if x == 'a':
retlist = [result['properties'].get('simpleLemma') for result in senses]
return retlist
elif x == 'b':
if senses:
print(senses[0]['properties'].get('simpleLemma'))
return(senses[0]['properties'].get('simpleLemma'))
else:
return 'Nope'
else:
print(response.status_code)
def check_domain(id, concepts):
service_url = 'https://babelnet.io/v5/getOutgoingEdges'
data = {
'id': id,
'key': KEY
}
response = requests.post(service_url, data=data)
if response.status_code == 200:
data = json.loads(response.text, 'utf-8')
targets = [result['target'] for result in data if result['target'] in concepts]
if targets:
return True
else:
return False
else:
print('not 200 response')
# Retrieve the IDs of the Babel synsets (concepts) denoted by a given word
def retrieve_synset_id(lemma):
service_url = 'https://babelnet.io/v5/getSynsetIds'
# target domains = {cloud, computer science, computing}
concepts = [' bn:00014688n', 'bn:01225375n', 'bn:00021494n']
lang = 'EN'
data = {
'lemma': lemma,
'searchLang': lang,
'key': KEY
}
response = requests.post(service_url, data=data)
response.encoding = 'utf-8'
if response.status_code == 200:
retlist = list()
data = json.loads(response.text, 'utf-8')
for result in data:
if check_domain(result['id'], concepts):
retlist.append(result['id'])
return retlist
else:
print(response.status_code)
# Retrieve hypernyms, hyponyms and antonyms of a given BabelNet synset
def retrieve_hypernyms(id):
service_url = 'https://babelnet.io/v5/getOutgoingEdges'
data = {
'id': id,
'key': KEY
}
response = requests.post(service_url, data=data)
response.encoding = 'utf-8'
if response.status_code == 200:
data = json.loads(response.text, 'utf-8')
retlist = [retrieve_info_synset(result['target'], 'b') for result in data if result['pointer']['relationGroup'] == "HYPERNYM"]
return retlist
else:
print(response.status_code)
def enrich_term(term):
synset_id_list = retrieve_synset_id(term)
synonyms = set()
hypernyms = set()
for id in synset_id_list:
synonyms.update(retrieve_info_synset(id, 'a'))
hypernyms.update(retrieve_hypernyms(id))
return synonyms, hypernyms
print(enrich_term('algorithm'))