我正在尝试使用Python请求进行操作。这是我的代码:
import threading
import resource
import time
import sys
#maximum Open File Limit for thread limiter.
maxOpenFileLimit = resource.getrlimit(resource.RLIMIT_NOFILE)[0] # For example, it shows 50.
# Will use one session for every Thread.
requestSessions = requests.Session()
# Making requests Pool bigger to prevent [Errno -3] when socket stacked in CLOSE_WAIT status.
adapter = requests.adapters.HTTPAdapter(pool_maxsize=(maxOpenFileLimit+100))
requestSessions.mount('http://', adapter)
requestSessions.mount('https://', adapter)
def threadAction(a1, a2):
global number
time.sleep(1) # My actions with Requests for each thread.
print number = number + 1
number = 0 # Count of complete actions
ThreadActions = [] # Action tasks.
for i in range(50): # I have 50 websites I need to do in parallel threads.
a1 = i
for n in range(10): # Every website I need to do in 3 threads
a2 = n
ThreadActions.append(threading.Thread(target=threadAction, args=(a1,a2)))
for item in ThreadActions:
# But I can't do more than 50 Threads at once, because of maxOpenFileLimit.
while True:
# Thread limiter, analogue of BoundedSemaphore.
if (int(threading.activeCount()) < threadLimiter):
item.start()
break
else:
continue
for item in ThreadActions:
item.join()
但问题是,在我获得50个线程后,Thread limiter
开始等待一些线程完成其工作。这就是问题所在。 scrit进入限制器后,lsof -i|grep python|wc -l
显示的活动连接少于50个。但是在Limiter之前它已经显示了所有&lt; = 50个过程。为什么会这样?或者我应该使用requests.close()而不是requests.session()来阻止它使用已经被oppened的套接字?
答案 0 :(得分:1)
你的限制器是一个紧凑的循环,占用了你的大部分处理时间。使用线程池来限制工作者的数量。
import multiprocessing.pool
# Will use one session for every Thread.
requestSessions = requests.Session()
# Making requests Pool bigger to prevent [Errno -3] when socket stacked in CLOSE_WAIT status.
adapter = requests.adapters.HTTPAdapter(pool_maxsize=(maxOpenFileLimit+100))
requestSessions.mount('http://', adapter)
requestSessions.mount('https://', adapter)
def threadAction(a1, a2):
global number
time.sleep(1) # My actions with Requests for each thread.
print number = number + 1 # DEBUG: This doesn't update number and wouldn't be
# thread safe if it did
number = 0 # Count of complete actions
pool = multiprocessing.pool.ThreadPool(50, chunksize=1)
ThreadActions = [] # Action tasks.
for i in range(50): # I have 50 websites I need to do in parallel threads.
a1 = i
for n in range(10): # Every website I need to do in 3 threads
a2 = n
ThreadActions.append((a1,a2))
pool.map(ThreadActons)
pool.close()