从许多关于堆栈溢出的文章中了解到,tweepy.streaming.stream类中的filter方法对 track 和 location arguements使用逻辑OR
因此,以下内容将返回来自location = USA的推文或带有"""
streamObj = tweepy.streaming.Stream(oauthObject
,EchoStreamListener(api=apiInstance,
dump_json=args.json,
numtweets=args.numtweets))
keyWordList = ['panthers','falcon']
GEOBOX_USA = [-125,25.1,-60.5,49.1]
streamObj.filter(locations=GEOBOX_USA, track=keyWordList, languages=['en'])
此解决方案(How to add a location filter to tweepy module )检查 on_status 方法中的关键字效果很好,但如果我需要存储整个json变量,我想我必须使用 on_data
所以更改了on_data(如下面的代码所示),但是收到错误:
File "/Library/Python/2.7/site-packages/tweepy/streaming.py", line 294, in _run
raise exception
KeyError: 'text'
from types import *
import tweepy
import json
import argparse
import io
class EchoStreamListener(tweepy.StreamListener):
def __init__(self, api, dump_json=False, numtweets=0):
self.api = api
self.dump_json = dump_json
self.count = 0
self.limit = int(numtweets)
super(tweepy.StreamListener, self).__init__()
# def on_status(self, status):
# if any(keyWord in status.text.lower() for keyWord in keyWordList):
# print status.text
#
# self.count+=1
# return False if self.count == self.limit else True
# else:
# return True # Don't kill the stream
def on_data(self, tweet):
tweet_data = json.loads(tweet) # This allows the JSON data be used as a normal dictionary:
if any(keyWord in tweet_data['text'] for keyWord in keyWordList):
if self.dump_json:
print json.dumps(tweet_data)
saveFile.write(unicode(tweet) + "\n")
self.count+=1
return False if self.count == self.limit else True
else:
print tweet_data['created_at','name','text'].encode("utf-8").rstrip()
def on_error(self, status_code):
print >> sys.stderr, 'Encountered error with status code:', status_code
return True
def get_parser():
parser = argparse.ArgumentParser(add_help=True)
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument(
'-j', '--json',
action='store_true',
help='dump each tweet as a json string'
)
group.add_argument(
'-t', '--text',
dest='json',
action='store_false',
help='dump each tweet\'s text'
)
parser.add_argument(
'-n', '--numtweets',
metavar='numtweets',
help='set number of tweets to retrieve'
)
return parser
if __name__ == '__main__':
oauthObject = tweepy.OAuthHandler(myconsumer_key, myconsumer_secret)
oauthObject.set_access_token(myaccess_key,myaccess_secret)
apiInstance = tweepy.API(oauthObject)
parser = get_parser()
args = parser.parse_args()
streamObj = tweepy.streaming.Stream(oauthObject
,EchoStreamListener(api=apiInstance,
dump_json=args.json,
numtweets=args.numtweets))
keyWordList = ['panthers','falcon']
GEOBOX_USA = [-125,25.1,-60.5,49.1]
saveFile = io.open('/Users/deepaktanna/raw_tweets.json', 'w', encoding='utf-8')
streamObj.filter(locations=GEOBOX_USA, languages=['en'])
saveFile.close()