如何使用Tweepy API从推文中获取media_url

时间:2016-04-19 07:02:50

标签: python twitter tweepy

我正在使用此代码:

import tweepy
from tweepy.api import API
import urllib
import os

i = 1
consumer_key="xx"
consumer_secret="xx"
access_token="xx"
access_token_secret="xx"
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.secure = True
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)

class MyStreamListener(tweepy.StreamListener):
    def __init__(self, api=None):
        self.api = api or API()
        self.n = 0
        self.m = 10

    def on_status(self, status):
        if 'media' in status.entities:
            for image in  status.entities['media']:
                global i
                #picName = status.user.screen_name
                picName = "pic%s.jpg" % i
                i += 1
                link = image['media_url']
                filename = os.path.join("C:/Users/Charbo/Documents/Python/",picName)
                urllib.urlretrieve(link,filename)
                #use to test
                print(status.user.screen_name)

        else: 
            print("no media_url")

        self.n = self.n+1

        if self.n < self.m: 
            return True
        else:
            print ('tweets = '+str(self.n))
            return False

    def on_error(self, status):
        print (status)

myStreamListener = MyStreamListener()
myStream = tweepy.Stream(auth, MyStreamListener(),timeout=30)
myStream.filter(track=['#feelthebern'])

我正在尝试访问字典中“photo”下的media_url。但我收到以下错误:'dict'对象没有属性'media'。我很感激帮助导航JSON。

提前致谢!

2 个答案:

答案 0 :(得分:5)

你应该尝试两件事:

  • 在您的请求中添加实体

&GT;

tweepy.Cursor(api.search, q="#hashtag", count=5, include_entities=True)
  • 检查媒体是否为零:

&GT;

if 'media' in tweet.entities:
    for image in  tweet.entities['media']:
        (do smthing with image['media_url'])

希望这会有所帮助

答案 1 :(得分:0)

这个回复可能有点晚了,但我相信有一天其他人会发现它很有用。我实际上不想转发任何带有视频的推文。所以我构建了这个函数......并且它完美地工作。

def on_status(self, status):
    #Ignores the tweet so long as I am the Author, or it's a reply to a tweet
    if status.in_reply_to_status_id is not None or \
        status.user.id == self.me.id:
        return

    #I only retweet tweets that I haven't yet retweeted. I also don't want to retweet any tweets that are quotes.
    if not status.retweeted and not status.is_quote_status:
        #Checking whether the tweet has no "media" in it.
        if 'media' not in status.entities:
            try:
                print(status.text)
                status.retweet()
                time.sleep(40) #Sleep for 40 seconds to avoid limits
            except Exception as e:
                print("Error on_data %s" % str(e))
                print("Error from retweeting")
        #If tweet has media, I only retweet a tweet with a photo
        elif 'media' in status.entities:
            media_details = status.entities['media']
            media_details_kind = media_details[0]
            #print(vide['type'])
            
            if media_details_kind['type'] == 'photo':
                try:
                    print("It is a photo")
                    status.retweet()
                    time.sleep(40)
                except Exception as e:
                    print("Error on_data %s" % str(e))
                    print("Error from retweeting")
        else: #Anything else is a video or GIF. I do nothing. 
            print("Sorry, this might be a video. Cound't retweet because it is neither a photo nor a text")
            print(status.text)