Question

我正在尝试通过对等的友谊网络进行爬网，在该网络中，我需要挑选特定用户的前5名最受欢迎的追随者，并通过他们各自的追随者群体进行爬网。当我尝试获取作为爬网函数列表的数据时，收到以下错误。

TypeError：文档必须是dict，bson.son.SON，bson.raw_bson.RawBSONDocument的实例，或者是从collections.MutableMapping继承的类型

在处理上述异常期间，发生了另一个异常：pymongo.errors.ServerSelectionTimeoutError：localhost：27017：[Errno 61]连接被拒绝。程序以退出代码结束：1

这是我的代码

def save_to_mongo(data, mongo_db, mongo_db_coll, **mongo_conn_kw):

client = pymongo.MongoClient(**mongo_conn_kw)
db = client[mongo_db]
coll = db[mongo_db_coll]

try:
    return coll.insert_many(data)
except:
    return coll.insert_one(data)

def load_from_mongo(mongo_db, mongo_db_coll, return_cursor=False,criteria=None, projection=None, **mongo_conn_kw):

client = pymongo.MongoClient(**mongo_conn_kw)
db = client[mongo_db]
coll = db[mongo_db_coll]

if criteria is None:
    criteria = {}

if projection is None:
    cursor = coll.find(criteria)
else:
    cursor = coll.find(criteria, projection)

if return_cursor:
    return cursor
else:
    return [ item for item in cursor ]

def pickFiveMostPopular(users):
unsortedList_by_follower_count = []
sortedList_by_follower_count = []
p = twitter_network.get_user_profile(twitter_api, screen_names=None, user_ids=users)
for user in users:
    unsortedList_by_follower_count.append(tuple((user, p[user]['followers_count'])))
sortedList_by_follower_count = sorted(unsortedList_by_follower_count, key = lambda x : x[1], reverse=True)
top5 = [x[0] for x in sortedList_by_follower_count[:5]]

print("The five most popular people who " + screen_name + " also follows are: " + getScreenName(top5))

return top5

def crawl_followers(twitter_api, screen_name, limit=1000000, depth=3, **mongo_conn_kw):

seed_id = str(twitter_api.users.show(screen_name=screen_name)['id'])

#_, next_queue = twitter_network.get_friends_followers_ids(twitter_api, user_id=seed_id, friends_limit=0, followers_limit=limit)
next_queue = pickFiveMostPopular(twitter_network.get_reciprocal_friends(twitter_api, screen_name=screen_name, friends_limit=0, followers_limit=limit))


save_to_mongo({'followers' : [ _id for _id in next_queue ]}, 'followers_crawl', '{0}-follower_ids'.format(seed_id), **mongo_conn_kw)
d = 1
while d < depth:
    d += 1
    (queue, next_queue) = (next_queue, [])
    for fid in queue:
        _, follower_ids = get_friends_followers_ids(twitter_api, user_id=fid,friends_limit=0, followers_limit=limit)
        save_to_mongo({'followers' : [ _id for _id in follower_ids ]},'followers_crawl', '{0}-follower_ids'.format(fid))
        next_queue += follower_ids
print("Done crawling!")

请告诉我如何正确地将数据获取到save_to_mongo函数。赞赏！

TypeError：文档必须是dict，bson.son.SON，bson.raw_bson.RawBSONDocument的实例，或者是从collections.MutableMapping继承的类型。

0 个答案: