无法从python多处理脚本中看到内部调试打印语句

时间:2019-02-06 04:24:51

标签: python multiprocessing

在下面的代码中,我试图从许多url(此示例提供的虚拟url)中打印出内容,但没有看到任何打印语句。这使得调试我的程序变得困难。有解决方法吗?

我已经尝试过设置日志级别并在python中启用详细模式,但效果不大-我可以看到子进程日志,但是这些信息不多。

import datetime
import urllib.request
from urllib.request import Request, urlopen
import os
import contextlib
import selenium.webdriver as webdriver
import lxml.html as LH
import lxml.html.clean as clean
import time
from datetime import timedelta
import threading
from multiprocessing import Pool, cpu_count
import logging
import multiprocessing
import sys

start_time = time.time()
#get all companies from edinet
ignore_tags=('script','noscript','style')

urls = ["https://www.webscraper.io/test-sites/e-commerce/allinone/computers","https://www.webscraper.io/test-sites/e-commerce/allinone/computers/laptops","https://www.webscraper.io/test-sites/e-commerce/allinone/phones/touch"]


allind = list(range(0,len(urls)))

def get_links(inds):
    for ind in inds:
        try:
            l= urls[ind]
            print(l)
            l = l.replace("\n","")
            options = webdriver.ChromeOptions()
            options.add_argument("headless")
            options.add_argument("--no-sandbox")
            driver =  webdriver.Chrome(executable_path="/PATH/chromedriver", chrome_options=options)
            driver.get(l)
            content=driver.page_source
            cleaner=clean.Cleaner()
            content=cleaner.clean_html(content)
            print(content)

        except Exception as e:
            sys.stdout.flush()
            print(e)
        driver.quit()


pool = Pool()
mpl = multiprocessing.log_to_stderr()
mpl.setLevel(multiprocessing.SUBDEBUG)

ITERATION_COUNT = cpu_count()-1
print(ITERATION_COUNT)
count_per_iteration = len(allind) / float(ITERATION_COUNT)
for i in range(0, ITERATION_COUNT):
    print(i)
    list_start = int(count_per_iteration * i)
    list_end = int(count_per_iteration * (i+1))
    pool.apply_async(get_links, [allind[list_start:list_end]])


elapsed_time_secs = time.time() - start_time

msg = "Execution took: %s secs (Wall clock time)" % timedelta(seconds=round(elapsed_time_secs))
print(msg)

编辑:

借助下面的评论以及更多的研究,我将代码的多处理部分更改为下面的代码,并观察到预期的行为

if __name__ == '__main__':
    start_time = time.time()
    with Pool(cpu_count()-1) as p:
        p.starmap(get_links, zip(range(1, 400)))
    p.close()
    p.join()

0 个答案:

没有答案