babelnet API-使用请求和JSON模块进行编码错误

时间:2018-07-26 10:14:14

标签: python json python-requests

我想使用BabelNet API检索引理的同义词和上位词。我设法做到了,但是它仅适用于某些引理,对于其他引理,它会引发此错误:UnicodeEncodeError: 'charmap' codec can't encode character '\u2601' in position 646: character maps to <undefined>。通过查看先前的相关问题,我发现必须打开utf-8编码的文件,但是我正在使用请求模块。查阅文档,我发现我可以强迫它使用utf-8。但是问题仍然存在。

这是产生问题的代码,如果您想重现它。

import requests
import json

KEY = 'KEY'  # You can get 1000 babelcoins free


# Retrieve the information of a given synset
def retrieve_info_synset(id, x):  # x == 'a': list of all lemmas, x == 'b': only the first lemma.
    service_url = 'https://babelnet.io/v5/getSynset'

    data = {
        'id': id,
        'key': KEY
    }

    response = requests.post(service_url, data=data)
    response.encoding = 'utf-8'

    if response.status_code == 200:
        data = json.loads(response.text)
        senses = data['senses']
        if x == 'a':
            retlist = [result['properties'].get('simpleLemma') for result in senses]
            return retlist
        elif x == 'b':
            if senses:
                print(senses[0]['properties'].get('simpleLemma'))
                return(senses[0]['properties'].get('simpleLemma'))
            else:
                return 'Nope'

    else:
        print(response.status_code)


def check_domain(id, concepts):
    service_url = 'https://babelnet.io/v5/getOutgoingEdges'

    data = {
        'id': id,
        'key': KEY
    }

    response = requests.post(service_url, data=data)
    if response.status_code == 200:

        data = json.loads(response.text, 'utf-8')
        targets = [result['target'] for result in data if result['target'] in concepts]
        if targets:
            return True
        else:
            return False
    else:
        print('not 200 response')


# Retrieve the IDs of the Babel synsets (concepts) denoted by a given word
def retrieve_synset_id(lemma):
    service_url = 'https://babelnet.io/v5/getSynsetIds'

    # target domains = {cloud, computer science, computing}
    concepts = [' bn:00014688n', 'bn:01225375n', 'bn:00021494n']

    lang = 'EN'

    data = {
        'lemma': lemma,
        'searchLang': lang,
        'key': KEY
    }

    response = requests.post(service_url, data=data)
    response.encoding = 'utf-8'

    if response.status_code == 200:
        retlist = list()
        data = json.loads(response.text, 'utf-8')
        for result in data:
            if check_domain(result['id'], concepts):
                retlist.append(result['id'])
        return retlist
    else:
        print(response.status_code)


# Retrieve hypernyms, hyponyms and antonyms of a given BabelNet synset
def retrieve_hypernyms(id):
    service_url = 'https://babelnet.io/v5/getOutgoingEdges'

    data = {
        'id': id,
        'key': KEY
    }

    response = requests.post(service_url, data=data)
    response.encoding = 'utf-8'

    if response.status_code == 200:
        data = json.loads(response.text, 'utf-8')
        retlist = [retrieve_info_synset(result['target'], 'b') for result in data if result['pointer']['relationGroup'] == "HYPERNYM"]
        return retlist
    else:
        print(response.status_code)


def enrich_term(term):

    synset_id_list = retrieve_synset_id(term)
    synonyms = set()
    hypernyms = set()
    for id in synset_id_list:
        synonyms.update(retrieve_info_synset(id, 'a'))
        hypernyms.update(retrieve_hypernyms(id))

    return synonyms, hypernyms


print(enrich_term('algorithm'))

0 个答案:

没有答案