for word in list6:
if word = "TRUMP":
所以,我在辩论记录中列出了每个单词。当特朗普说话时,它以“TRUMP”开头。我需要接受他的话并把它们分成一个单独的列表。如果list6中的单词是“TRUMP”,那么我需要将所有内容放入列表中,直到它显示另一个人的姓名。他不止一次说话。
我只需要帮助完成这个循环。
答案 0 :(得分:2)
list6 = ['TRUMP','I','am','good', 'HILLARY','I','am','good','too','TRUMP','But','How?']
person_words = {'TRUMP':[], 'HILLARY':[]}
person_names = person_words.keys()
one_person_onetime_words = []
for word in list6:
if word in person_names:
if len(one_person_onetime_words):
person_words[this_person].append(one_person_onetime_words)
one_person_onetime_words = []
this_person = word
else:
one_person_onetime_words.append(word)
person_words[this_person].append(one_person_onetime_words)
print person_words
给出
{'HILLARY': [['I', 'am', 'good', 'too']], 'TRUMP': [['I', 'am', 'good'], ['But', 'How?']]}
所以,这只是一次性提供所有人的所有不同的谈话。
正如您在对自己问题的评论中所提到的,如果您只想获得一个人的话,可以使用以下内容:
from copy import copy
list6 = ['TRUMP','I','am','good', 'HILLARY','I','am','good','too','TRUMP','But','How?']
person_words = []
all_persons = ['TRUMP', 'HILLARY']
person_looking_for = 'TRUMP'
filter_out_persons = copy(all_persons)
filter_out_persons.remove(person_looking_for)
person_onetime_words = []
capture_words = False
for word in list6:
if word == person_looking_for:
capture_words = True
if len(person_onetime_words):
person_words.append(person_onetime_words)
person_onetime_words = []
elif word not in filter_out_persons and capture_words:
person_onetime_words.append(word)
else:
capture_words = False
person_words.append(person_onetime_words)
print "{}'s words".format(person_looking_for)
print person_words
这给了
TRUMP's words
[['I', 'am', 'good'], ['But', 'How?']]
并且,以下将给出一个单词作为键的字典,该值将再次成为一个字典,每个人的频率为该单词。
import pprint
list6 = ['TRUMP','I','am','good', 'HILLARY','I','am','good','too','TRUMP','But','How?']
person_names = ['TRUMP','HILLARY']
word_frequency = {}
for word in list6:
if word in person_names:
person = word
else:
word = word.lower()
if word in word_frequency:
if person in word_frequency[word]:
word_frequency[word][person] += 1
else:
word_frequency[word][person] = 1
else:
word_frequency[word] = {person: 1}
pprint.pprint(word_frequency)
给出
{'am': {'HILLARY': 1, 'TRUMP': 1},
'but': {'TRUMP': 1},
'good': {'HILLARY': 1, 'TRUMP': 1},
'how?': {'TRUMP': 1},
'i': {'HILLARY': 1, 'TRUMP': 1},
'too': {'HILLARY': 1}}