我有一些Twitter json文件,我想搜索关键字并保存这些关键字为csv的推文。在尝试删除空行的过程中,我尝试过类似帖子的建议,例如:How to delete all blank lines in the file with the help of python?)。但是,空行继续出现在我的csv文件中。有谁知道问题是什么?
filenames = ["8may.json", "9may.json"]
open_files = map(open, filenames)
# keywords that you want to filter out; note that keywords should be in all lowercase
## change this to the keywords you want to use
keywords = ["test", "testing", "tester"]
# iterates though the files and does keyword matching; the tweet is only saved in csv if the tweet["text"] matches the keywords
for file in open_files:
for line in file:
if line.rstrip():
try:
# condition for searching through tweet["text"] with keywords
if re.findall(r'\b(%s)\b' % '|'.join(keywords), str(line).lower()):
tweets.append(json.loads(line))
except:
pass
for tweet in tweets:
ids.append(tweet["id_str"])
texts.append(tweet["text"])
time_created.append(tweet["created_at"])
retweet_counts.append(tweet["retweet_count"])
in_reply_to_screen_name.append(tweet["in_reply_to_screen_name"])
geos.append(tweet["geo"])
coordinates.append(tweet["coordinates"])
places.append(tweet["place"])
# if there is no places data, then return None
try:
places_country.append(tweet["place"]["country"])
except:
places_country.append("None")
lang.append(tweet["lang"])
user_screen_names.append(tweet["user"]["screen_name"])
user_followers_count.append(tweet["user"]["followers_count"])
user_friends_count.append(tweet["user"]["friends_count"])
user_statuses_count.append(tweet["user"]["statuses_count"])
user_locations.append(tweet["user"]["statuses_count"])
print >> out, "ids,text,time_created,retweet_counts,in_reply_to,geos,coordinates,places,country,language,screen_name,followers,friends,statuses,locations"
rows = zip(ids,texts,time_created,retweet_counts,in_reply_to_screen_name,geos,coordinates,places,places_country,lang,user_screen_names,user_followers_count,user_friends_count,user_statuses_count,user_locations)
csv = writer(out)
for row in rows:
values = [(value.encode('utf8') if hasattr(value, 'encode') else value) for value in row]
csv.writerow(values)
out.close()