最近我在Python 3.4中编写了一个多进程代码来下载一些图像,它起初工作速度非常快,然后我收到以下错误,无法启动该程序。
Traceback (most recent call last):
File "multiprocessing_d.py", line 23, in <module>
main()
File "multiprocessing_d.py", line 16, in main
p.map(download, lines)
File "/usr/local/lib/python3.4/multiprocessing/pool.py", line 260, in map
return self._map_async(func, iterable, mapstar, chunksize).get()
File "/usr/local/lib/python3.4/multiprocessing/pool.py", line 608, in get
raise self._value
multiprocessing.pool.MaybeEncodingError: Error sending result: '<multiprocessing.pool.ExceptionWithTraceback object at 0x7f1e047f32e8>'. Reason: 'TypeError("cannot serialize '_io.BufferedReader' object",)'
我的代码如下 download_helper.py
import sys
import os
from pathlib import Path
url_prefix = r"Some prefix"
def setup_download_dir(dictionary):
download_dir = Path(dictionary)
if not download_dir.exists():
download_dir.mkdir()
return dictionary
def download_link(dictionary, line):
from urllib.request import urlretrieve
itemid = line.split()[0].decode()
link = line.split()[1].decode()
if (link.startswith("http")):
image_url = link
else:
image_url = url_prefix + link
if os.path.isfile(dictionary + "/" + itemid + ".jpg"):
#print("Already have " + itemid + ".jpg")
pass
else:
urlretrieve(image_url, dictionary + "/" + itemid + ".jpg")
multiprocessing_d.py
from functools import partial
from multiprocessing.pool import Pool
import sys
from time import time
from download_helper import setup_download_dir, download_link
def main():
file_path = sys.argv[1]
dic_path = sys.argv[2]
download_dir = setup_download_dir(dic_path)
download = partial(download_link, download_dir)
with open(file_path, 'rb') as f:
lines = f.readlines()
ts = time()
p = Pool(processes=16, maxtasksperchild=1)
p.map(download, lines)
p.close()
p.join()
print('Took {}s'.format(time() - ts))
f.close()
if __name__ == "__main__":
main()
我试图在线搜索,但没有找到有用的信息。我怀疑是urlretrieve可能会有一些异常,但我不知道如何调试它。任何意见或建议将不胜感激!!
詹姆斯
答案 0 :(得分:-3)
我不是最好的程序员,对此并不了解,但你可以尝试:
from functools import partial
from multiprocessing.pool import Pool
import sys
from time import time
from download_helper import setup_download_dir, download_link
def main():
try:
file_path = sys.argv[1]
dic_path = sys.argv[2]
download_dir = setup_download_dir(dic_path)
download = partial(download_link, download_dir)
with open(file_path, 'rb') as f:
lines = f.readlines()
ts = time()
p = Pool(processes=16, maxtasksperchild=1)
p.map(download, lines)
p.close()
p.join()
print('Took {}s'.format(time() - ts))
f.close()
except:
pass
if __name__ == "__main__":
main()
如果这不起作用,那么除了:之外,使用除了TypeError之外的其他方法。 否则,我不知道,抱歉。
好运