我想抓取Twitter API来检索特定用户的关注者ID,以便我可以映射他们的连接。
当我运行下面的代码时,每个用户的followerIds
都是相同的,这可能不对:
try:
import json
except ImportError:
import simplejson as json
import urllib2
import urllib
import codecs
import time
import datetime
import os
import random
import time
import tweepy
from tweepy.parsers import RawParser
import sys
fhLog = codecs.open("LOG.txt",'a','UTF-8')
def logPrint(s):
fhLog.write("%s\n"%s)
print s
#List of screennames of users whose followers we want to get
users =["_AReichert",
"_CindyWallace_",
"_MahmoudAbdelal",
"1939Ford9N",
"1FAMILY2MAN",
"8Amber8",
"AboutTeaching",
"AcamorAcademy",
"acraftymom",
"ActivNews",
"ActuVideosPub",
"ad_jonez",
"adamsteaching",
"ADHD_HELP",
"AIHEHistory",
"ajpodchaski",
"ak2mn",
"AkaMsCrowley",
"AlanAwstyn",
"albertateachers"]
# == OAuth Authentication ==
# The consumer keys can be found on your application's Details
# page located at https://dev.twitter.com/apps (under "OAuth settings")
consumer_key=""
consumer_secret=""
# After the step above, you will be redirected to your app's page.
# Create an access token under the the "Your access token" section
access_token=""
access_token_secret=""
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
rawParser = RawParser()
api = tweepy.API(auth_handler=auth, parser=rawParser)
#Will store ids of followers for each user in the user_output directory
os.system("mkdir -p user_output") #Create directory if it does not exist
userCnt=0
fhOverall=None
for user in users:
userCnt+=1
print("Getting user %s of %s"%(userCnt,len(users)))
count=1
nCursor=-1#First page
while count>0:
id_str=user
try:
fh=open("user_output/"+str(id_str)+"_" + str(count) + ".json","r")
result=fh.read()
fh.close()
wait=0
except:
result=api.followers_ids(count=5000,user_id=id_str,cursor=nCursor)
fh=open("user_output/"+str(id_str)+"_" + str(count) + ".json","w")
fh.write(result)
fh.close()
wait=60
result=json.loads(result)
nCursor=result["next_cursor_str"]
if nCursor=="0":
count=-1
nCursor=None
else:
count+=1
print("Another page to get")
time.sleep(wait)
logPrint("\nDONE! Completed Successfully")
fhLog.close()
如何解决此问题?
答案 0 :(得分:0)
这可能无法解答您的问题,但您的导入中存在缩进问题... 试试这个:
try:
import json
except ImportError:
import simplejson as json
import urllib2
import urllib
import codecs
import time
import datetime
import os
import random
import time
import tweepy
from tweepy.parsers import RawParser
import sys
此外,您可以直接使用os模块创建目录。试试这个:
if not os.path.exists("./user_output"):
os.path.makedirs("./user_output")
最后,你做一个time.sleep(等待)但是可能没有设置等待。试试这个:
if api.followers_ids(count=5000,user_id=id_str,cursor=nCursor):
time.sleep(60)
答案 1 :(得分:0)
tweepy的文档表明api.followers_ids接受的唯一参数是id,user_id或screen_name,而不是您传递的三个参数。
http://pythonhosted.org/tweepy/html/api.html#api-reference
您还需要将返回的值分配给结果变量。摆脱if语句并将其放在原位。
result=api.followers_ids(id_str)
wait=60