我有以下Python脚本使用Tweepy来获取推文并使用PyMongo将它们存储到MongoDB中。发生什么事情就是每当我运行这个脚本时,我的MongoDB实例都会关闭,脚本会因错误而停止。我不知道为什么会发生这种情况,因为即使脚本正在终止或者Tweepy遇到错误,它也不应该影响MongoDB的运行状态。此行被标识为第一个触发点:stream.filter(locations=[-119.970703, 48.994636, -109.951172, 59.955010])
脚本
import tweepy, sys, json, traceback
from bson.json_util import loads as json_to_bson
from hashlib import sha1
from datetime import datetime
from pymongo import MongoClient
from time import sleep, strptime, mktime
client = MongoClient()
mode = None
class Stream(tweepy.StreamListener):
def on_status(self, data):
save(data)
def on_error(self, code):
pause()
def now():
return str(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
def pause():
sys.stdout.flush()
sleep((60*15)+5)
def save(data):
bson = json_to_bson(json.dumps(data._json))
tweet_date = strptime(bson['created_at'], "%a %b %d %H:%M:%S +0000 %Y")
tweet_date = str(datetime.fromtimestamp(mktime(tweet_date)))
bson['created_at'] = tweet_date
bson['text_hash'] = sha1(bson['text'].encode('punycode')).hexdigest()
bson['collected_at'] = now()
bson['collection_type'] = mode
if client.grebe.tweets.find_one({'text_hash': bson['text_hash']}) == None:
client.grebe.tweets.insert_one(bson)
def api():
CONSUMER_KEY = 'key'
CONSUMER_SECRET = 'secret'
ACCESS_TOKEN_KEY = 'tokenkey'
ACCESS_TOKEN_SECRET = 'tokensecret'
auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_TOKEN_KEY, ACCESS_TOKEN_SECRET)
return tweepy.API(auth)
def main():
mystream = Stream()
stream = tweepy.Stream(api().auth, mystream)
stream.filter(locations=[-119.970703, 48.994636, -109.951172, 59.955010])
main()
错误
File "/usr/local/lib/python2.7/dist-packages/tweepy/streaming.py", line 445, in filter
self._start(async)
File "/usr/local/lib/python2.7/dist-packages/tweepy/streaming.py", line 361, in _start
self._run()
File "/usr/local/lib/python2.7/dist-packages/tweepy/streaming.py", line 294, in _run
raise exception
AutoReconnect: connection closed