为什么使用 threads
时会运行以下代码,但在使用 multiprocessing
时会抛出异常?
from multiprocessing import Pool
from multiprocessing.dummy import Pool as ThreadsPool
import urllib2
urls = [
'http://www.python.org',
'http://www.python.org/about/',
'http://www.python.org/doc/',
'http://www.python.org/download/']
def use_threads():
pool = ThreadsPool(4)
results = pool.map(urllib2.urlopen, urls)
pool.close()
pool.join()
print [len(x.read()) for x in results]
def use_procs():
p_pool = Pool(4)
p_results = p_pool.map(urllib2.urlopen, urls)
p_pool.close()
p_pool.join()
print 'using procs instead of threads'
print [len(x.read()) for x in p_results]
if __name__ == '__main__':
use_procs()
例外是
Traceback (most recent call last):
File "pools.py", line 39, in <module>
use_procs()
File "pools.py", line 31, in use_procs
p_results = p_pool.map(urllib2.urlopen, urls)
File "/usr/lib64/python2.7/multiprocessing/pool.py", line 250, in map
return self.map_async(func, iterable, chunksize).get()
File "/usr/lib64/python2.7/multiprocessing/pool.py", line 554, in get
raise self._value
multiprocessing.pool.MaybeEncodingError: Error sending result: '[<addinfourl at 35286624 whose fp = <socket._fileobject object at 0x2198ad0>>]'. Reason: 'PicklingError("Can't pickle <type 'instancemethod'>: attribute lookup __builtin__.instancemethod failed",)'
我知道进程和线程之间的通信方式有所不同。为什么pickle
网站内容失败?如何设置编码以解决此问题?
答案 0 :(得分:4)
问题不是编码错误,而是因为结果错误,因为结果href="{% url 'polls:index' %}"
返回的是一个不可解决的对象(urllib2.urlopen()
,根据错误消息I中显示的略有不同的原因得到你的代码)。要解决此问题,您可以通过在打开URL后读取数据来限制返回对象对子进程本身的使用,如下所示。这可能意味着需要在进程之间传递更多数据。
_ssl._SSLSocket
输出:
# Added.
def get_data(url):
soc = urllib2.urlopen(url)
return soc.read()
def use_procs():
p_pool = Pool(4)
# p_results = p_pool.map(urllib2.urlopen, urls)
p_results = p_pool.map(get_data, urls)
p_pool.close()
p_pool.join()
print 'using procs instead of threads'
# print [len(x.read()) for x in results]
print [len(x) for x in p_results]
答案 1 :(得分:2)
正如我已经提到的那样 - 因为你试图在进程之间传递套接字对象而引发错误。您必须将脚本逻辑更改为以下内容:
from multiprocessing.pool import Pool
from multiprocessing.pool import ThreadPool
import urllib2
urls = [
'http://www.python.org',
'http://www.python.org/about/',
'http://www.python.org/doc/',
'http://www.python.org/download/'
]
def worker(url):
return urllib2.urlopen(url).read() # string returned
def use_threads():
pool = ThreadPool(4)
results = pool.map(worker, urls)
pool.close()
pool.join()
print([len(x) for x in results])
def use_procs():
p_pool = Pool(4)
p_results = p_pool.map(worker, urls)
p_pool.close()
p_pool.join()
print('using procs instead of threads')
print([len(x) for x in p_results])
if __name__ == '__main__':
use_procs()
顺便说一句:你可以从池中找工厂并从中挑选池,而不是在use_threads
和use_procs
中复制代码:
from multiprocessing.pool import Pool
from multiprocessing.pool import ThreadPool
import urllib2
urls = [
'http://www.python.org',
'http://www.python.org/about/',
'http://www.python.org/doc/',
'http://www.python.org/download/'
]
def worker(url):
return urllib2.urlopen(url).read()
def pool_factory(key, n):
if key == 'proc':
print('using procs instead of threads')
return Pool(n)
else:
return ThreadPool(n)
def main():
pool = pool_factory('proc', 4) # change `proc` to anything for using ThreadPool
results = pool.map(worker, urls)
pool.close()
pool.join()
print([len(x) for x in results])
if __name__ == '__main__':
main()