我有一个多线程应用程序,它使用python-requests library和BeautifulSoup运行HTTP请求。虽然python-requests
的主页声明该库是线程安全的,但我怀疑段错误在python-requests
。我不知道BeautifulSoup
可能不是线程安全的,但我不能排除这种可能性。
Coredump here。 (~800 KiB)
Coredump here too for redundancy。 (~800 KiB)
这是相关Python代码的摘要:
import bs4
import requests
import time
from functools import partial
from threading import Thread
def get_response(url, phrase=''):
proxy = '1.2.3.4:3128'
proxy_config = { 'http':proxy, 'https':proxy, 'ftp':proxy }
try:
response = requests.get(url, proxies=proxy_config)
except Exception as error:
return False
if response.status_code != 200:
return False
soup = bs4.BeautifulSoup(response.text)
for element in soup.select('p'):
# Check some things
if things_checked == True:
write_result(str(things))
return True
def write_result(things=''):
global output_file
try:
output_file.write(things)
output_file.flush()
except Exception as error:
return False
def main(urls, phrases=[]):
global output_file
try:
output_file = open('output.txt', 'a+', 1)
except Exception as error:
return False
for url in urls:
for phrase in phrases:
t = Thread(target=partial(get_response, url, phrase))
t.daemon = True
t.start()
time.sleep(10) # Actually, other non-related code runs here.
return True
if __name__ == '__main__':
urls = ['1.1.1.1', '2.2.2.2']
phrases = ['john', 'paul', 'george', 'ringo']
main(urls, phrases)