以下代码。
import sys
import urllib2
import threading
import time
urls = ["http://www.google.com", "http://www.apple.com"]
def fetch_url(url):
html = urllib2.urlopen(url).read()
print html
f = open("Output.txt", "w")
e = open("ErrorUsers.txt", "w")
threads = [threading.Thread(target=fetch_url, args=(url,)) for url in urls]
for thread in threads:
try:
thread.daemon = True
thread.start()
except urllib2.HTTPError, e:
e.write(url + "\n")
except urllib2.URLError, e:
e.write(url + "\n")
for thread in threads:
thread.join()
基本上我需要从大量网站中获取数据(我们正在谈论成千上万)。 我正在使用上面的代码作为基础,这很好用,但是,在创建750个线程后,代码会冻结。 我想知道如何限制活动线程的数量,或者一旦完成就关闭线程。
答案 0 :(得分:1)
您可以使用类似于多处理中实现的线程池。在以下代码段中,任何时候最多可以激活100个线程。
from multiprocessing.pool import ThreadPool
urls = ["http://www.google.com", "http://www.apple.com"]
def fetch_url(url):
html = urllib2.urlopen(url).read()
print html
pool = ThreadPool(100)
pool.map(fetch_url, urls)
pool.close()
pool.join()
答案 1 :(得分:0)
from multiprocessing.pool import ThreadPool
import urllib2
urls = ["http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com","http://www.google.com", "http://www.apple.com"]
def fetch_url(url):
html = urllib2.urlopen(url).read()
print html[1:10]
pool.TerminateProcess()
pool = ThreadPool(100)
pool.map(fetch_url, urls)
pool.close()
pool.join()