我想对从twitter生成的json文件进行一些数据分析。 我需要的字段是包含推文本身的文本字段。
您的贡献受到高度赞赏
while tweetCount < maxTweets:
try:
if (max_id <= 0):
if (not sinceId):
new_tweets = api.search(q=search_query, count=tw_block_size)
else:
new_tweets = api.search(q=search_query, count=tw_block_size, since_id=sinceId)
else:
if (not sinceId):
new_tweets = api.search(q=search_query, count=tw_block_size, max_id=str(max_id - 1))
else:
new_tweets = api.search(q=search_query, count=tw_block_size, max_id=str(max_id - 1), since_id=sinceId)
if not new_tweets:
print("Collecte terminee.")
break
for tweet in new_tweets:
day = tweet.created_at.strftime('%Y-%m-%d')
with open( "%s/%s_tweets.json" % (output_dir, day), 'a') as f:
f.write(json.dumps(tweet._json))
f.write('\n')
tweetCount += len(new_tweets)
print("{0} tweets téléchargés".format(tweetCount))
max_id = new_tweets[-1].id
except tweepy.TweepError as e:
print("an error was occured to continue , run the following command:")
print("python collect.py -s {0} -o {1} -u {2}".format(search_query, output_dir, max_id))
print("")
print("Error : " + str(e))
break