#-*- coding:utf-8 -*-
import tweepy as tp
import datetime
import json
import sys
import time
import numpy as np
consumer_key='ThnJCL5MrFjUrEG...rqd3J'
consumer_secret='6u13y5wzZGUi8pNfvgjh...1strtxXAV8k3zXl9l76GZ5o'
access_token_key='1441391030-O3Sqn....n0aTqrrETtIVAwFzk03mP'
access_token_secret='AKRD....Xtyoj4BZEr4Ytm'
#创建认证对象
auth = tp.OAuthHandler(consumer_key,consumer_secret)
#设置token和密码
auth.set_access_token(access_token_key,access_token_secret)
api=tp.API(auth)
class MyStreamListener(tp.StreamListener):
#def __init__(self,time_limit=3600):
#self.start_time = time.time()
def on_status(self, status):
print(status.text)
#reference:https://marcobonzanini.com/2015/03/02/mining-twitter-data-with-python-part-1/
def on_data(self, data):
try:
with open('20181101', 'a') as f:
f.write(data)
return True
except BaseException as e:
print("Error on_data: %s" % str(e))
return True
def on_error(self, status):
print(status)
return True
myStreamListener = MyStreamListener()
myStream = tp.Stream(auth=api.auth,listener=myStreamListener)
myStream.filter(track = ['glasgow'])
tweet = json.loads(open(20181101).read())
tweet_texts = [tweet['text'] for tweet in tweet]
tweet_source = [tweet['source'] for tweet in tweet]
tweet_geo = [tweet['geo'] for tweet in tweet]
tweet_location = [tweet['location'] for tweet in tweet]
hashtags = [hashtag['text'] for tweet in tweet for hashtag in tweet['entities']['hashtags']]
print(tweet_texts)
print(tweet_geo)
print(tweet_location)
print(hashtags)
每一次,当我想打印tweet_texts时,都会显示错误:
文件“”,第1行,在 tweet = json.loads(open(20181101).read())
TypeError:强制转换为Unicode:需要字符串或缓冲区,找到int。 这是从tweepy获得来自Twitter的推文的信息流,对此我很陌生。 首先谢谢