Question

    import nltk
    from itertools import groupby


    def get_continuous_chunks(tagged_sent):
        continuous_chunk = []
        current_chunk = []

        for token, tag in tagged_sent:
            if tag != "O":
                current_chunk.append((token, tag))
            else:
                if current_chunk: # if the current chunk is not empty
                    continuous_chunk.append(current_chunk)
                    current_chunk = []
        # Flush the final current_chunk into the continuous_chunk, if any.
        if current_chunk:
            continuous_chunk.append(current_chunk)
        return continuous_chunk

    ne_tagged_sent = [('Rami', 'PERSON'), ('Eid', 'PERSON'), ('is', 'O'), ('studying', 'O'), ('at', 'O'), ('Stony', 'ORGANIZATION'), ('Brook', 'ORGANIZATION'), ('University', 'ORGANIZATION'), ('in', 'O'), ('NY', 'LOCATION')]

    named_entities = get_continuous_chunks(ne_tagged_sent)
    named_entities = get_continuous_chunks(ne_tagged_sent)
    named_entities_str = [" ".join([token for token, tag in ne]) for ne in named_entities]
    named_entities_str_tag = [(" ".join([token for token, tag in ne]), ne[0][1]) for ne in named_entities]

    def parser(n,string):
        for i in named_entities_str_tag[n]:
            if i==string:
                pass
            else:
                return i


print named_entities_str_tag
print

我从上面的代码得到了这个输出：

（＆＃39; PERSON＆＃39;，＆＃39; Rami Eid＆＃39;）
（＆＃39;组织＆＃39;，＆＃39;石溪大学＆＃39;）
（＆＃39; LOCATION＆＃39;，＆＃39; NY＆＃39;）
（＆＃39; PERSON＆＃39;，＆＃39; GuruRaj Bagali＆＃39;）
（＆＃39;组织＆＃39;，＆＃39;基督大学＆＃39;）

但是我希望它应该像PERSON WITH ORGANIZATION和LOCATION一样映射我想以json格式存储它。

Answer 1

ne_tagged_sent列表包含的内容不是很清楚（每个PERSON，ORGANIZATION都有位置吗？），您必须澄清它我们可以回答您的问题。

Answer 2

您应该将数据格式化为字典，每个条目对应一个人：

import json
data = {
        'Rami Eid':{'job': 'engineer', 'location':'NY'},
        'GuruRaj Bagali':{'job': 'professor', 'location': 'NY'}
       }
#Save it in a json file
json.dump(data, open('path/to_your_file', 'w')

如何在json / database中存储ner结果

2 个答案: