Tweepy流媒体非常慢

时间:2018-12-23 11:07:09

标签: python pandas tweepy

我正在尝试使用Tweepy和python获取推特数据。当使用tweepy流式传输时,tweet的速度非常慢。还有其他库或方法可以更快地做到吗?我现在可以在5分钟内阅读20条推文。 我的代码:

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)

# Return API with authentication:
api = tweepy.API(auth, wait_on_rate_limit=True)


#vul variabelen in:
topic = "GOOGL"

language = "en"
df = pd.DataFrame(columns=['Tweets','Date','Likes','RTs','Sentiment','User','Followers'])
dfh = dataframeHandler(df)
twh = twitterHandler()

def preprocess2(raw_text):
stopword_set = set(stopwords.words("english"))
return " ".join([i for i in re.sub(r"http\S+|@\S+", "", raw_text).lower().split() if i not in stopword_set])

class CustomStreamListener(tweepy.StreamListener):
def __init__(self):
    super(CustomStreamListener, self).__init__()
    self.num_tweets = 0

def on_status(self, status):
    self.num_tweets += 1
    if self.num_tweets < 25:
        print("in")
        if not status.retweeted and 'RT @' not in status.text:
            print(status.text)
            status.text = preprocess2(status.text)
            #add tweet to df
            df.loc[len(df)] = [status.text,status.author.created_at,status.favorite_count,status.retweet_count,twh.get_tweet_sentiment(status.text),twh.get_user(status.author),twh.get_user_followers(status.user)]

            return True
    else:
        return False
def on_error(self, status_code):
    return True # Don't kill the stream

def on_timeout(self):
    print("timeout")
    return True # Don't kill the stream

myStreamListener = CustomStreamListener()

myStream = tweepy.Stream(auth = api.auth, listener=CustomStreamListener())
myStream.filter(track=['AAPL'])

dfh.df.to_csv(topic + ".csv")

0 个答案:

没有答案