我正在使用asyncio和aiohttp收集一批请求并异步运行它们。我的数据库中有超过500,000个用户要处理。
我会按100个批次对这500,000个用户进行预处理,因为api可以在一个请求中处理100个用户。
然后,我将同时进行10个请求的分组,总共有919个请求。但是,经过一定时间后,请求挂起/冻结,我注意到我的计算机变慢了。
import asyncio
import aiohttp
import config
from api import TwitterAPI
import motor.motor_asyncio
import itertools
from helper import create_batches, divide_chunks
import time
COUNT = 0
SCREEN_NAME = "MENnewsdesk"
async def fetch_user_objects(apps, session, user_id, tweet_mode):
batch_start = time.time()
global COUNT
client = motor.motor_asyncio.AsyncIOMotorClient('mongodb://localhost:27017')
db = client['twitter']
while True:
if (should_reset(apps) == True):
print("Reseting Apps: Sleep 15 Minutes")
time.sleep(901)
apps = init_twitter_engine()
app = get_available_app(apps)
response = await app.get_users_lookup(session, user_id=user_id, tweet_mode="extended")
if response['status'] != 200:
app.rate_limited = True
continue
for x in response['result']:
await db["MENnewsdesk"].update_one({"user_id": x["id"]}, {"$set": {
"user": x
}})
COUNT += len(response['result'])
batch_end = time.time()
print("APP ID: {app_id}, RATE LIMITED: {app_remaining}, TOTAL USERS: {total_users}, BATCH USERS: {batch_users}, BATCH TIME: {batch_time}"
.format(app_id=app.app_id, app_remaining=app.rate_limited, total_users=COUNT, batch_users=len(response['result']), batch_time=round((batch_end - batch_start), 2)))
return response
async def main():
client = motor.motor_asyncio.AsyncIOMotorClient('mongodb://localhost:27017')
db = client['twitter']
twitter_engine = init_twitter_engine()
async with aiohttp.ClientSession() as session:
users = []
cursor = db[SCREEN_NAME].find(
{"user": {"$exists": False}}, {"user_id": 1, "_id": 0})
async for document in cursor:
users.append(document["user_id"])
user_list = create_batches(users)
print(len(user_list))
batches = list(divide_chunks(user_list, 10))
print(len(batches))
for i, batch in enumerate(batches):
tasks = [asyncio.ensure_future(fetch_user_objects(twitter_engine, session, user_id=users, tweet_mode="extended")) for users in batch]
for t in tasks:
d = await t
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
我不确定这里发生了什么,如何调试此问题?