我做了一些多处理示例,如下所示:
import time
import multiprocessing as mp
import requests
from bs4 import BeautifulSoup as bs
from multiprocessing import Pool
manager = mp.Manager()
messages_dict = manager.dict()
messages_dict['value'] = ''
def complex_task():
req = requests.get('https://stackoverflow.com')
html = req.text
soup = bs(html, 'html.parser')
my_titles = soup.select('h3 > a')
data = []
for title in my_titles:
data.append(title.get('href'))
time.sleep(2)
class abc:
def all(self):
complex_task()
return "This is abc \n what a abc!\n"
class bcd:
def all(self):
complex_task()
return "This is bcd \n what a bcd!\n"
class cde:
def all(self):
complex_task()
return "This is cde \n what a cde!\n"
class ijk:
def all(self):
complex_task()
return "This is ijk \n what a ijk!\n"
def crawler(sites, ps_queue):
for site in sites:
ps_queue.put(site)
def message_collector(ps_queue):
while True:
site = ps_queue.get()
messages_dict['value'] += site.all()
ps_queue.task_done()
def main():
site_list = [abc(), bcd(), cde(), ijk()]
#
# Single process
#
# start_time = time.time()
# messages = ''
# for site in site_list:
# messages += site.all()
# print(messages)
#
# Multi process
#
ps_queue = mp.JoinableQueue()
message_collector_proc = mp.Process(
target=message_collector,
args=(ps_queue, )
)
message_collector_proc.daemon = True
start_time = time.time()
message_collector_proc.start()
crawler(site_list, ps_queue)
ps_queue.join()
print(messages_dict['value'])
print(time.time() - start_time)
if __name__ == "__main__":
main()
但事实证明,单个进程和多进程程序显示相同的运行时速度。正如您在上面的代码中看到的那样,main()
中有两部分代码:Multi process
和Single process
。
2013年底的Macbook Pro运行时(OS X Sierra): - 单一过程:16.92385721206665 - 多进程:16.362822771072388
差不多......
我不知道为什么会这样......需要你的帮助!