import nltk
from itertools import groupby
def get_continuous_chunks(tagged_sent):
continuous_chunk = []
current_chunk = []
for token, tag in tagged_sent:
if tag != "O":
current_chunk.append((token, tag))
else:
if current_chunk: # if the current chunk is not empty
continuous_chunk.append(current_chunk)
current_chunk = []
# Flush the final current_chunk into the continuous_chunk, if any.
if current_chunk:
continuous_chunk.append(current_chunk)
return continuous_chunk
ne_tagged_sent = [('Rami', 'PERSON'), ('Eid', 'PERSON'), ('is', 'O'), ('studying', 'O'), ('at', 'O'), ('Stony', 'ORGANIZATION'), ('Brook', 'ORGANIZATION'), ('University', 'ORGANIZATION'), ('in', 'O'), ('NY', 'LOCATION')]
named_entities = get_continuous_chunks(ne_tagged_sent)
named_entities = get_continuous_chunks(ne_tagged_sent)
named_entities_str = [" ".join([token for token, tag in ne]) for ne in named_entities]
named_entities_str_tag = [(" ".join([token for token, tag in ne]), ne[0][1]) for ne in named_entities]
def parser(n,string):
for i in named_entities_str_tag[n]:
if i==string:
pass
else:
return i
print named_entities_str_tag
print
我从上面的代码得到了这个输出:
(' PERSON',' Rami Eid')
('组织','石溪大学')
(' LOCATION',' NY')
(' PERSON',' GuruRaj Bagali')
('组织','基督大学')
但是我希望它应该像PERSON WITH ORGANIZATION和LOCATION一样映射我想以json格式存储它。
答案 0 :(得分:0)
ne_tagged_sent列表包含的内容不是很清楚(每个PERSON,ORGANIZATION都有位置吗?),您必须澄清它我们可以回答您的问题。
答案 1 :(得分:0)
您应该将数据格式化为字典,每个条目对应一个人:
import json
data = {
'Rami Eid':{'job': 'engineer', 'location':'NY'},
'GuruRaj Bagali':{'job': 'professor', 'location': 'NY'}
}
#Save it in a json file
json.dump(data, open('path/to_your_file', 'w')