我想在twitter搜索API上进行多重处理。 我有下面的代码,它每次都进行1次调用,而不是多次执行。
from multiprocessing import Process
from twitter import *
config = {}
exec(compile(open("config.py", "rb").read(), "config.py", 'exec'), config)
twitter = Twitter(
auth=OAuth(config["access_key"], config["access_secret"], config["consumer_key"], config["consumer_secret"]))
def twitterSearch(word):
tweetsWithWord = twitter.search.tweets(q=word, count=100)
print(tweetsWithWord)
if __name__ == '__main__':
for i in range(8):
p = Process(target=twitterSearch, args=('racist',))
p.start()
p.join()
请帮我解决这个问题。
答案 0 :(得分:0)
如果我理解正确,您想要的是为搜索字词提供连续的结果流。我不知道您正在使用的套餐,但我知道twython和tweepy都可以使用Twitter的流媒体API。
在任何情况下,您都需要处理流中出现的每条推文,如果需要,您可以在该阶段使用流程/线程。
流媒体的代码示例:
from threading import Thread
from queue import Queue
from twython import TwythonStreamer
from requests.exceptions import ChunkedEncodingError
CONSUMER_KEY = 'AAA'
CONSUMER_SECRET = 'BBB'
ACCESS_KEY = 'CCC'
ACCESS_SECRET = 'DDD'
class TwitterStream(TwythonStreamer):
def __init__(self, consumer_key, consumer_secret, token, token_secret, tqueue):
self.tweet_queue = tqueue
super(TwitterStream, self).__init__(consumer_key, consumer_secret, token, token_secret)
def on_success(self, data):
if 'text' in data:
self.tweet_queue.put(data)
def on_error(self, status_code, data):
#print(status_code)
#with open(logfile,'a') as f:
# f.write(time.asctime(time.gmtime()) + ' ' + status_code + '\n')
# Want to stop trying to get data because of the error?
# Uncomment the next line!
# self.disconnect()
pass
def stream_tweets(tweets_queue,track):
# Input your credentials below
consumer_key = CONSUMER_KEY
consumer_secret = CONSUMER_SECRET
token = ACCESS_KEY
token_secret = ACCESS_SECRET
try:
stream = TwitterStream(consumer_key, consumer_secret, token, token_secret, tweets_queue)
stream.statuses.filter(track=track)
except ChunkedEncodingError:
# Sometimes the API sends back one byte less than expected which results in an exception in the
# current version of the requests library
stream_tweets(tweet_queue)
def process_tweets(tweets_queue, reply_dict, api, logfile):
while True:
twt = tweets_queue.get()
# Do something with the tweet
# You can start a new thread for actually proccessing each tweet
tweets_queue.task_done()
tweet_queue = Queue()
track = 'whatever you want to filter by' # Search terms go here
Thread(target=stream_tweets,
args=(tweet_queue, track,),
daemon=True).start()
process_tweets(tweet_queue, reply_dict, api, logfile)