我要从FB批处理请求数据,并将其保存到本地数据库中。
我想使用ThreadPoolExecutor
+ asyncio
来添加更多请求。
除某些失败的项目外,许多数据已成功处理。
(1.8w data successed, 1.9k failed)
此处的主要代码:
class PullCampaignDataService(object):
def __init__(self):
self.fb_user_list = FbUser.objects.filter(is_active=1)
self.max_worker = 5
self.chunk_size = 20
def run(self):
with ThreadPoolExecutor(max_workers=self.max_worker) as executor:
executor.map(self._get_accounts_by_fb_user, self.fb_user_list)
def _get_accounts_by_fb_user(self, fb_user):
my_api = FacebookAdsApi(FacebookSession(settings.FB_APP_ID, settings.FB_APP_SECRET, fb_user.access_token))
me = User(fbid='me', api=my_api)
ad_accounts = [ {'account_id': str(account['account_id']), "access_token": fb_user.access_token} for account in me.get_ad_accounts(
fields=[AdAccount.Field.id, AdAccount.Field.account_id])]
iter_accounts = (
ad_accounts[i:i + self.chunk_size] for i in range(0, len(ad_accounts), self.chunk_size)
)
new_loop = asyncio.new_event_loop()
asyncio.set_event_loop(new_loop)
for chucnk_ad_accounts in iter_accounts:
task_list = [
asyncio.ensure_future(self._handle_account_campaign(ad_account)) for ad_account in chucnk_ad_accounts
]
new_loop.run_until_complete(asyncio.gather(*task_list))
async def _handle_account_campaign(self, ad_account):
try:
async with aiohttp.ClientSession() as session:
async with session.get(
settings.FB_GET_CAMPAIGN_URL.format(account_id=ad_account['account_id'], access_token=ad_account['access_token'])
) as response:
result = json.loads(await response.text())
# ..
except aiohttp.client_exceptions.ClientConnectionError as E:
info_logger.info("bad request connect error: " + str(E))
我发现的所有错误都是Cannot connect to host graph.facebook.com
。
错误日志文件:
# 'format': '%(asctime)s %(levelname)s %(module)s %(thread)d %(message)s'
...
2019-04-03 16:58:23,012 INFO pull_campaign_data 336704 bad request connect error: Cannot connect to host graph.facebook.com: 443 ssl:True [None]
2019-04-03 16:58:23,015 INFO pull_campaign_data 336704 bad request connect error: Cannot connect to host graph.facebook.com: 443 ssl:True [None]
2019-04-03 16:58:23,027 INFO pull_campaign_data 336704 bad request connect error: Cannot connect to host graph.facebook.com: 443 ssl:True [None]
...
我发现3个不同的线程之间发生了错误,但是大多数失败的数据都在一个指定的线程thread_336704
# 1903 fail total
thread_6244480 1 failed
thread_846848 22 failed
thread_336704 1880 failed
我不知道如何解决这个问题。它与我使用多线程和asyncio的方式有关吗?
我已经在堆栈和Google中搜索过,但找不到任何解决方法