我是python和这里的新手。只是在努力收集我需要的数据。
我试图从使用tweepy和python 3.7的用户那里获取推文数据,但是我无法获取作为媒体类型的数据。
我也尝试过使用extended_entities,但是由于“ AttributeError:'状态'对象没有属性'extended_entities'”而出现错误
代码底部是我尝试的。
import tweepy # https://github.com/tweepy/tweepy
import csv
from urllib.request import urlopen
# Twitter API credentials
consumer_key = ""
consumer_secret = ""
access_key = ""
access_secret = ""
# initialize a list to hold all the tweepy Tweets
def redirect(url):
page = urlopen(url)
return page.geturl()
def get_all_tweets(screen_name):
# Twitter only allows access to a users most recent 3240 tweets with this method
# authorize twitter, initialize tweepy
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_key, access_secret)
api = tweepy.API(auth)
# initialize a list to hold all the tweepy Tweets
alltweets = []
# make initial request for most recent tweets (200 is the maximum allowed count)
new_tweets = api.user_timeline(screen_name=screen_name, count=1, tweet_mode='extended',include_entities=True)
# save most recent tweets
alltweets.extend(new_tweets)
# save the id of the oldest tweet less one
oldest = alltweets[-1].id - 1
# keep grabbing tweets until there are no tweets left to grab
while len(new_tweets) > 0:
print("getting tweets before %s" % (oldest))
# all subsequent requests use the max_id param to prevent duplicates
new_tweets = api.user_timeline(screen_name=screen_name, count=200, max_id=oldest, tweet_mode='extended',include_entities=True)
# save most recent tweets
alltweets.extend(new_tweets)
# update the id of the oldest tweet less one
oldest = alltweets[-1].id - 1
print("...%s tweets downloaded so far" % (len(alltweets)))
outtweets = [] # initialize master list to hold our ready tweets
for tweet in alltweets:
try :
outtweets.append([tweet.created_at, tweet.full_text.encode("utf-8"), tweet.retweet_count, tweet.favorite_count, tweet.entities['hashtags'],tweet.extended_entities['media'][0]['type']])
except (NameError, KeyError,UnicodeEncodeError):
pass
with open('%s_416.csv' % screen_name, 'w') as f:
writer = csv.writer(f)
writer.writerow(["created_at","full_text","retweet_count","favorite_count", "hashtags", "media"])
writer.writerows(outtweets)
pass
if __name__ == '__main__':
# pass in the username of the account you want to download
username = ["username"]
for x in username:
get_all_tweets(x)
有人知道其他只显示媒体对象类型的方法吗?