我正在尝试使用python
从用户时间轴获取包含文本“#Gempa”的特定推文我能够获得用户时间表,但我希望获得时间表,文字只包含“#Gempa”或特定文字
这是我的代码
#Import the necessary methods from tweepy library
import tweepy, codecs
import pymysql
import time
#Variables that contains the user credentials to access Twitter API
access_token = "XXX"
access_token_secret = "XXX"
consumer_key = "XXX"
consumer_secret = "XXX"
#Authentication
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
#Declare Connection
conn = pymysql.connect(host='localhost', port='', user='root', passwd='', db='test', use_unicode=True, charset="utf8mb4")
cur = conn.cursor()
#Get Current Date Time
curdatetime = time.strftime("%Y-%m-%d %H:%M:%S")
cur.execute("DELETE FROM tweet order by id desc LIMIT 500")
#Get last id from table tweet
last_id = 0
cur.execute("SELECT MAX(id) FROM tweet")
result = cur.fetchall()
for row in result:
last_id = row[0]
print ("Last ID : " + str(last_id))
#Get Number of Tweet
user = api.get_user(108543358)
print ("Name:", user.name)
print ("Name:", user.screen_name)
print ("Number of tweets: " + str(user.statuses_count))
print ("followers_count: " + str(user.followers_count))
print ("Account location: ", user.location)
print ("Account created at: ", user.created_at)
n = 0
for Tweet in tweepy.Cursor(api.user_timeline, id=108543358, q = "#Gempa", lang = id, result_type = "recent", since_id = last_id).items(3):
print ("*****" + str(i) +"*****")
print ("ID: " + Tweet.id_str)
print ("Text: " + str(Tweet.text.encode("utf-8")))
print ("Retweet Count: " + str(Tweet.retweet_count))
print ("Favorite Count: " + str(Tweet.favorite_count))
print ("Date Time: " + str(Tweet.created_at))
#print (str(Tweet.location)) #how to get geolocation data for mapping ?
print ("************")
n = n + 1
cur.execute("INSERT INTO tweet (no, id, text, retweet_count, favourite_count, date_time) VALUES (%s, %s,%s,%s,%s,%s)",
(str(n), Tweet.id_str, Tweet.text.encode("utf-8"), str(Tweet.retweet_count), str(Tweet.favorite_count), str(Tweet.created_at)))
conn.commit()
cur.close()
conn.close()
,结果是
我无法获得具有特定文字的用户时间表,任何人都可以解决此问题,谢谢
答案 0 :(得分:1)
首先,没有" q"," lang"," result_type" API.user_timeline的参数(阅读http://docs.tweepy.org/en/v3.5.0/api.html#API.user_timeline)
所以要忽略一些推文,你必须编写一个过滤器。你可以跳过不包含"#Gempa"像这样:
for Tweet in api.user_timeline(user_id=108543358):
text = str(Tweet.text.encode("utf-8"))
if "#Gempa" not in text:
continue
print ("*****" + str(n) +"*****")
print ("ID: " + Tweet.id_str)
...