所以我一直在使用Tweepy运行我的Python代码来抓取某些电影标题,并使用nohup terminal语句将它们存储在我的PHPmyAdmin数据库中以使其保持运行。前两周代码只破了一次,但自从它破坏了越来越多,现在它已经达到了代码每天停止多次运行的程度。
import tweepy
import time, json, datetime
import pprint as p
import pymysql, sys
from dateutil.parser import parse
from datetime import datetime
import sys
consumer_key = "xxxxxxxxxxxx"
consumer_secret = "xxxxxxxxxxx"
access_token = "xxxxxxxxxxxxx"
access_token_secret = "xxxxxxxxxxx"
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
class StreamListener(tweepy.StreamListener):
def __init__(self):
self.start_time = time.time()
self.count = 0
self.api = api
super(tweepy.StreamListener, self).__init__()
def on_error(self, status_code):
print 'Error: ' + repr(status_code)
return False
def on_data(self, tweet):
tweet = json.loads(tweet)
try:
if tweet['created_at']:
ts = time.strftime('%Y-%m-%d %H:%M:%S', time.strptime(tweet['created_at'],'%a %b %d %H:%M:%S +0000 %Y'))
else:
ts = time.strftime('%Y-%m-%d %H:%M:%S')
data = (tweet['id_str'],tweet['user']['name'],ts ,tweet['in_reply_to_user_id_str'],tweet['in_reply_to_status_id_str'],tweet['favorite_count'],tweet['retweet_count'],tweet['retweeted'],tweet['text'],tweet['lang'],tweet['user']['location'],tweet['user']['time_zone'])
cur.execute("INSERT INTO AllMovieTweets (id_str,user_name,created_at, in_reply_to_user_id_str,in_reply_to_status_id_str,favorite_count,retweet_count,retweeted,text,lang,user_location,time_zone) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)", data)
conn.commit()
except Exception as e:
print "error in stream listener ",e
print data
return True
def on_timeout(self):
return True # Don't kill the stream
try:
conn = pymysql.connect(host='xxxxxxxx', port=xxxx, user='xxxxxx', passwd='xxxxxxxx', db='xxxxxxxxxx', charset='utf8')
cur = conn.cursor()
l = StreamListener()
streamer = tweepy.Stream(auth=auth, listener=l)
setTerms = [ 'graceofmonaco', 'grace of monaco', 'need for speed', 'needforspeed', 'NFSmovie', 'muppets most wanted', 'muppetsmovie2', 'muppetsmostwanted', 'muppets movie', 'muppets film', 'divergent', 'badwords', 'bad words movie', 'bad words film', 'badwordsmovie', 'noah', 'noahmovie', 'a haunted house', 'ahhmovie', 'ahauntedhousemovie', 'ahauntedhouse', 'captain america', 'captainamerica', 'winter soldier', 'wintersoldier', 'domhemingway', 'dom hemingway']
streamer.filter(track = setTerms)
except Exception as e:
print "error",e
sys.exit()
过去一周我大部分时间都犯过的错误是:
流监听器中的错误' created_at' 错误局部变量'数据'在分配前引用
这就是为什么我已经在那里实时添加了try和else语句,因为我认为错误意味着时间格式错误,所以如果我得到它来存储time.strftime然后它不会崩溃但不幸的是没有帮助。有没有人有什么建议导致这个错误?可能是Tweepy的错误还是我的代码中有明显的错误?