无法在不同的线程中运行r.html.render(),运行时错误此事件循环已在运行,如何解决?

时间:2019-07-04 08:58:09

标签: python multithreading python-requests

无法在不同的线程中运行r.hml.render()

  

运行时错误,此事件循环已在运行,

如何解决?

from bs4 import BeautifulSoup
import requests_html
import multiprocessing
from concurrent.futures import ProcessPoolExecutor
import threading
import os
import xlwt
import time


session = requests_html.HTMLSession()
session.browser

links = []

def parse_it(sending_chunck2):
    global links
    for i in sending_chunck2:
        r = session.get(i)
        r.html.render()
        soup = BeautifulSoup(r.html.html(), 'lxml')
        sp = soup.find_all('a')
        links.append(sp)
        print(links)
        if len(links) >= 10:
            wr_to_xs = multiprocessing.Process(target=write_func, args=(links,))
            wr_to_xs.close()
            wr_to_xs.join()
        else:
            print('not')

def parse_func(clean_url):
    urls_len = len(clean_url) // 4
    if urls_len % 2 != 0 or urls_len == 0:
        urls_len += 1
    if len(clean_url) <=8:
        prev_c = 0
        next_c = len(clean_url)
    else:
        prev_c = 0
        next_c = urls_len
    for i in range(4):
        sending_chunck2 = clean_url[prev_c:next_c]
        t = threading.Thread(target=parse_it, args=(sending_chunck2,))
        t.start() 




def write_func(data_to_write):
    #lock.acquire()
    filename = 'exceel_output.xls'
    wb = xlwt.Workbook()
    ws = wb.add_sheet('output')
    prev_count = 0
    for d in data_to_write:
        l = d.strip()
        if l == "" or len(l) < 1:
            continue
        else:
            ws.write(prev_count, 0, l)
            prev_count += 1

    wb.save(filename)
    #lock.release()


def main_loop(file):
    clean_url = []
    with open(file, 'r') as f:
        urls = [l.split(';') for l in f ]
    for l in urls:
        for url in l:
            n_url = url.strip()
            if n_url == "" or len(n_url) < 1:
                continue
            else:
                clean_url.append(n_url)
    urls_len = len(clean_url) // 8
    l_n = 1
    if urls_len % 2 != 0:
        urls_len += 1
    if len(clean_url) <=8:
        prev_c = 0
        next_c = len(clean_url)
        l_n = 1
    else:
        prev_c = 0
        next_c = urls_len
        l_n = 8

    for i in range(l_n):
        sending_chunck = ([(clean_url[prev_c:next_c])])
        with ProcessPoolExecutor(2) as ex:
            res = ex.map(parse_func, sending_chunck)
        prev_c = next_c + 1
        next_c += urls_len
    return list(res)
     #q_data.put  q_data.get()




if __name__ == '__main__':
    f_path = '' + os.path.dirname(os.path.realpath(__file__)) + '/urls.txt'
    main_loop(f_path)

    #main_loop()

您的帖子似乎主要是代码;请添加更多详细信息。 看起来您的帖子大部分是代码;请添加更多详细信息。 看起来您的帖子大部分是代码;请添加更多详细信息。

0 个答案:

没有答案