如何使用tweepy获得关注者数量

时间:2015-04-24 22:42:09

标签: python python-2.7 twitter tweepy

我试图让追随者数量的公司并随着时间的推移跟踪它。我有超过20万家公司,因此我目前拥有的代码将花费数年的时间来运行当前的api限制。

c = tweepy.Cursor(api.followers_ids, id = a)
ids = []
for id in c.items():
   time.sleep(0.01)
   ids.append(id) '

在这段代码中,每个粉丝都有一个api命中。我想知道是否有一个函数只是将跟随者计数为数字?还有什么是twitter api限制?

1 个答案:

答案 0 :(得分:1)

每个API请求一次最多返回5000个关注者ID,以检索20万家公司的所有关注者,这是一本非常有用的脚本来自Matthew A的书挖掘社交网络罗素解决了twitter api限制

制作强大的Twitter请求并访问Twitter的API Matthew 定义了以下方法:

import sys
import time
from urllib2 import URLError
from httplib import BadStatusLine
import json
import twitter

def oauth_login():
    CONSUMER_KEY = ''
    CONSUMER_SECRET = ''
    OAUTH_TOKEN = ''
    OAUTH_TOKEN_SECRET = ''
    auth = twitter.oauth.OAuth(OAUTH_TOKEN, OAUTH_TOKEN_SECRET,
                               CONSUMER_KEY, CONSUMER_SECRET)
    twitter_api = twitter.Twitter(auth=auth)
    return twitter_api

def make_twitter_request(twitter_api_func, max_errors=10, *args, **kw):
# A nested helper function that handles common HTTPErrors. Return an updated
# value for wait_period if the problem is a 500 level error. Block until the
# rate limit is reset if it's a rate limiting issue (429 error). Returns None
# for 401 and 404 errors, which requires special handling by the caller.
    def handle_twitter_http_error(e, wait_period=2, sleep_when_rate_limited=True):
        if wait_period > 3600: # Seconds
            print >> sys.stderr, 'Too many retries. Quitting.'
            raise e
# See https://dev.twitter.com/docs/error-codes-responses for common codes
        if e.e.code == 401:

            print >> sys.stderr, 'Encountered 401 Error (Not Authorized)'
            return None
        elif e.e.code == 404:
            print >> sys.stderr, 'Encountered 404 Error (Not Found)'
            return None
        elif e.e.code == 429:
            print >> sys.stderr, 'Encountered 429 Error (Rate Limit Exceeded)'
            if sleep_when_rate_limited:
                print >> sys.stderr, "Retrying in 15 minutes...ZzZ..."
                sys.stderr.flush()
                time.sleep(60*15 + 5)
                print >> sys.stderr, '...ZzZ...Awake now and trying again.'
                return 2
            else:
                raise e # Caller must handle the rate limiting issue
        elif e.e.code in (500, 502, 503, 504):
            print >> sys.stderr, 'Encountered %iError. Retrying in %iseconds' %\
            (e.e.code, wait_period)
            time.sleep(wait_period)
            wait_period *= 1.5
            return wait_period
        else:
            raise e
# End of nested helper function
    wait_period = 2
    error_count = 0
    while True:
        try:
            return twitter_api_func(*args, **kw)
        except twitter.api.TwitterHTTPError, e:
            error_count = 0
            wait_period = handle_twitter_http_error(e, wait_period)
            if wait_period is None:
                return
        except URLError, e:
            error_count += 1
            print >> sys.stderr, "URLError encountered. Continuing."
            if error_count > max_errors:
                print >> sys.stderr, "Too many consecutive errors...bailing out."
                raise
        except BadStatusLine, e:
            error_count += 1
            print >> sys.stderr, "BadStatusLine encountered. Continuing."
            if error_count > max_errors:
                print >> sys.stderr, "Too many consecutive errors...bailing out."
                raise

这是检索朋友和粉丝的方法:

from functools import partial
from sys import maxint

def get_friends_followers_ids(twitter_api, screen_name=None, user_id=None,
friends_limit=maxint, followers_limit=maxint):
    # Must have either screen_name or user_id (logical xor)
    assert (screen_name != None) != (user_id != None),\
    "Must have screen_name or user_id, but not both"
    # See https://dev.twitter.com/docs/api/1.1/get/friends/ids and
    # https://dev.twitter.com/docs/api/1.1/get/followers/ids for details
    # on API parameters
    get_friends_ids = partial(make_twitter_request, twitter_api.friends.ids,
                             count=5000)
    get_followers_ids = partial(make_twitter_request,twitter_api.followers.ids,
                               count=5000)
    friends_ids, followers_ids = [], []
    for twitter_api_func, limit, ids, label in [
                    [get_friends_ids, friends_limit, friends_ids, "friends"],
                    [get_followers_ids, followers_limit, followers_ids, "followers"]
                ]:
        if limit == 0: continue
        cursor = -1
        while cursor != 0:

            # Use make_twitter_request via the partially bound callable...
            if screen_name:
                response = twitter_api_func(screen_name=screen_name, cursor=cursor)
            else: # user_id
                response = twitter_api_func(user_id=user_id, cursor=cursor)
            if response is not None:
                ids += response['ids']
                cursor = response['next_cursor']
            print >> sys.stderr, 'Fetched {0} total {1} ids for{2}'.format(len(ids),
                                                    label, (user_id or screen_name))
            # XXX: You may want to store data during each iteration to provide  
            # an additional layer of protection from exceptional circumstances
            if len(ids) >= limit or response is None:
                break
    # Do something useful with the IDs, like store them to disk...
    return friends_ids[:friends_limit], followers_ids[:followers_limit]

# Sample usage
twitter_api = oauth_login()
friends_ids, followers_ids =get_friends_followers_ids(twitter_api,
                                                      screen_name="SocialWebMining",
                                                      friends_limit=10,
                                                      followers_limit=10)
print friends_ids
print followers_ids