从推文中收集URI

时间:2017-02-08 22:36:14

标签: python twitter output uri tweepy

我目前正在编写一个使用Tweepy& amp; Twitter API,并从twitter上的推文中提取URI链接。

这是我目前的代码。如何修改它以便它只从推文中输出URI(如果有的话)?

    #Import the necessary methods from tweepy library
    from tweepy.streaming import StreamListener
    from tweepy import OAuthHandler
    from tweepy import Stream

    #Variables that contains the user credentials to access Twitter API
    access_token = "-"
    access_token_secret = ""
    consumer_key = ""
    consumer_secret = ""


    #This is a basic listener that just prints received tweets to stdout.
    class StdOutListener(StreamListener):

    def on_data(self, data):
    print data
    return True

    def on_error(self, status):
    print status


    if __name__ == '__main__':

    #This handles Twitter authetification and the connection to Twitter Streaming API
    l = StdOutListener()
    auth = OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)
    stream = Stream(auth, l)

    #This line filter Twitter Streams to capture data by the keyword: '#NFL'
    twitterator = stream.filter(track=[ '#NFL' ])

    for tweet in twitterator:
    print "(%s) @%s %s" % (tweet["created_at"], tweet["user"]["screen_name"], tweet["text"])
    for url in tweet["entities"]["urls"]:
        print " - found URL: %s" % url["expanded_url"]

1 个答案:

答案 0 :(得分:0)

我已将您的代码修改为仅在打印时显示以下网址:

#Import the necessary methods from tweepy library
import json

from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream

#Variables that contains the user credentials to access Twitter API
access_token = "-"
access_token_secret = ""
consumer_key = ""
consumer_secret = ""


#This is a basic listener that just prints received tweets to stdout.
class StdOutListener(StreamListener):
    def on_data(self, data):
        tweet = json.loads(data)
        for url in tweet["entities"]["urls"]:
            print " - found URL: %s" % url["expanded_url"]
        return True

    def on_error(self, status):
        print status


if __name__ == '__main__':
    #This handles Twitter authetification and the connection to Twitter Streaming API
    l = StdOutListener()
    auth = OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)
    stream = Stream(auth, l)

    #This line filter Twitter Streams to capture data by the keyword: '#NFL'
    stream.filter(track=[ '#NFL' ])