在python 3.4中使用新的asyncio,如何获取一组锁/信号量中可用的第一个锁/信号量?
我所做的方法是使用wait(return_when=FIRST_COMPLETED)
,然后取消所有仍在等待的acquire()
,一旦我设法获得一个。但是我担心这可能会导致细微的错误/竞争条件,我觉得有一种更优雅的方式。
import asyncio as aio
@aio.coroutine
def run():
sem1, sem2 = (aio.Semaphore(), aio.Semaphore())
print('initial:', sem1, sem2)
a = aio.async(sleep(sem1, 1)) # acquire sem1
print('just after sleep:', sem1, sem2)
done, pending = yield from aio.wait([sem1.acquire(), sem2.acquire()], return_when=aio.FIRST_COMPLETED)
print('done:', done)
print('pending:', pending)
for task in pending:
task.cancel()
print('after cancel:', sem1, sem2)
yield from aio.wait([a])
print('after wait:', sem1, sem2)
@aio.coroutine
def sleep(sem, i):
with (yield from sem):
yield from aio.sleep(i)
if __name__ == "__main__":
aio.get_event_loop().run_until_complete(run())
上面的代码给出了(编辑了内存地址):
initial: <asyncio.locks.Semaphore object at 0x1 [unlocked,value:1]> <asyncio.locks.Semaphore object at 0x2 [unlocked,value:1]>
just after sleep: <asyncio.locks.Semaphore object at 0x1 [unlocked,value:1]> <asyncio.locks.Semaphore object at 0x2 [unlocked,value:1]>
done: {Task(<acquire>)<result=True>}
pending: {Task(<acquire>)<PENDING>}
after cancel: <asyncio.locks.Semaphore object at 0x1 [locked,waiters:1]> <asyncio.locks.Semaphore object at 0x2 [locked]>
after wait: <asyncio.locks.Semaphore object at 0x1 [unlocked,value:1]> <asyncio.locks.Semaphore object at 0x2 [locked]>
答案 0 :(得分:4)
如果我正确理解您的问题,您希望拥有两个不同的锁池,一个允许每个代理X个连接,另一个允许Y个全局连接。实际上可以使用单个Semaphore
对象pretty easily:
类asyncio.Semaphore(value = 1,*,loop = None)
信号量管理一个内部计数器,每个计数器递减一次 acquire()调用并通过每个release()调用递增。柜台可以 永远不要低于零;当acquire()发现它为零时,它会阻塞, 等到其他一些线程调用release()。
因此,不是使用每个使用默认Semaphore
1初始化的value
对象列表来实现池,而只需初始化单个value
的{{1}}无论您希望能够同时运行的最大任务数是多少。
Semaphore
然后在您的代码中,只需在获取全局代码之前始终获取代理信号量
proxy_sem = Semaphore(value=5) # 5 connections will be able to hold this semaphore concurrently
global_sem = Semaphore(value=15) # 15 connections will be able to hold this semaphore
这样,在等待特定于代理的锁定时,您将不会持有全局锁定,这可能会阻止来自另一个代理的连接,如果它可以获得全局锁定,则可以自由运行。
修改强>
这是一个完整的示例,演示了一种完成此操作的方法,而根本不需要特定于代理的锁。相反,您为每个代理运行一个协程,所有代理都使用相同的队列。代理协同程序仅通过跟踪它们已启动的活动任务来限制它们运行的并发任务的数量,并且仅在它们低于限制时启动新任务。当代理协程启动任务时,该任务负责获取全局信号量。这是代码:
with (yield from proxy_sem):
with (yield from global_sem):
示例输出:
import asyncio
import random
PROXY_CONN_LIMIT = 5
GLOBAL_CONN_LIMIT = 20
PROXIES = ['1.2.3.4', '1.1.1.1', '2.2.2.2', '3.3.3.3', '4.4.4.4']
@asyncio.coroutine
def do_network_stuff(item, proxy_info):
print("Got {}. Handling it with proxy {}".format(item, proxy_info))
# Wait a random amount of time to simulate actual work being done.
yield from asyncio.sleep(random.randint(1,7))
@asyncio.coroutine
def handle_item(item, proxy_info, global_sem):
with (yield from global_sem): # Get the global semaphore
yield from do_network_stuff(item, proxy_info)
@asyncio.coroutine
def proxy_pool(proxy_info, queue, global_sem):
tasks = []
def remove_item(task, *args):
tasks.remove(task)
while True: # Loop infinitely. We'll return when we get a sentinel from main()
while len(tasks) < PROXY_CONN_LIMIT: # Pull from the queue until we hit our proxy limit
item = yield from queue.get()
print(len(tasks))
if item is None: # Time to shut down
if tasks:
# Make sure all pending tasks are finished first.
yield from asyncio.wait(tasks)
print("Shutting down {}".format(proxy_info))
return
# Create a task for the work item, and add it to our list of
# tasks.
task = asyncio.async(handle_item(item, proxy_info, global_sem))
tasks.append(task)
# We've hit our proxy limit. Now we wait for at least one task
# to complete, then loop around to pull more from the queue.
done, pending = yield from asyncio.wait(tasks,
return_when=asyncio.FIRST_COMPLETED)
# Remove the completed tasks from the active tasks list.
for d in done:
tasks.remove(d)
@asyncio.coroutine
def main():
global_sem = asyncio.Semaphore(GLOBAL_CONN_LIMIT)
queue = asyncio.Queue()
tasks = []
# Start the proxy pools.
for proxy in PROXIES:
tasks.append(asyncio.async(proxy_pool(proxy, queue, global_sem)))
# Send work to the proxy pools.
for i in range(50):
yield from queue.put(i)
# Tell the proxy pools to shut down.
for _ in PROXIES:
yield from queue.put(None)
# Wait for them to shut down.
yield from asyncio.wait(tasks)
if __name__ == "__main__":
loop = asyncio.get_event_loop()
loop.run_until_complete(main())