从json输出中删除空行到csv

时间:2014-05-14 07:40:57

标签: python json csv strip

我有一些Twitter json文件,我想搜索关键字并保存这些关键字为csv的推文。在尝试删除空行的过程中,我尝试过类似帖子的建议,例如:How to delete all blank lines in the file with the help of python?)。但是,空行继续出现在我的csv文件中。有谁知道问题是什么?

filenames = ["8may.json", "9may.json"]
open_files = map(open, filenames)

# keywords that you want to filter out; note that keywords should be in all lowercase
## change this to the keywords you want to use
keywords = ["test", "testing", "tester"]

# iterates though the files and does keyword matching; the tweet is only saved in csv if the tweet["text"] matches the keywords
for file in open_files:
    for line in file:
        if line.rstrip():
            try:
                # condition for searching through tweet["text"] with keywords
                if re.findall(r'\b(%s)\b' % '|'.join(keywords), str(line).lower()):
                    tweets.append(json.loads(line))
            except:
                pass

for tweet in tweets:
    ids.append(tweet["id_str"])
    texts.append(tweet["text"])
    time_created.append(tweet["created_at"])
    retweet_counts.append(tweet["retweet_count"])
    in_reply_to_screen_name.append(tweet["in_reply_to_screen_name"])
    geos.append(tweet["geo"])
    coordinates.append(tweet["coordinates"])
    places.append(tweet["place"])
    # if there is no places data, then return None
    try:
        places_country.append(tweet["place"]["country"])
    except:
        places_country.append("None")
    lang.append(tweet["lang"])
    user_screen_names.append(tweet["user"]["screen_name"])
    user_followers_count.append(tweet["user"]["followers_count"])
    user_friends_count.append(tweet["user"]["friends_count"])
    user_statuses_count.append(tweet["user"]["statuses_count"])
    user_locations.append(tweet["user"]["statuses_count"])

print >> out, "ids,text,time_created,retweet_counts,in_reply_to,geos,coordinates,places,country,language,screen_name,followers,friends,statuses,locations"
rows = zip(ids,texts,time_created,retweet_counts,in_reply_to_screen_name,geos,coordinates,places,places_country,lang,user_screen_names,user_followers_count,user_friends_count,user_statuses_count,user_locations)

csv = writer(out)

for row in rows:
    values = [(value.encode('utf8') if hasattr(value, 'encode') else value) for value in row]
    csv.writerow(values)

out.close()

0 个答案:

没有答案