我正在写一个链接爬取程序,但是我的程序花了几分钟才能识别出链接无效。这大大降低了我的程序的效率,但是我无法ping通,因为它仅接受TCP通信。我的程序是使用python编写的,但是如果需要的话,切换到更适合此任务的另一种语言并不是问题。
proxies = {
'http': 'socks5h://127.0.0.1:9050',
'https': 'socks5h://127.0.0.1:9050'
}
link_error = False
class find_links_on_page:
def get_raw(self):
print(search_link)
print('working1a')
global link_error
try:
raw =requests.get(search_link, proxies=proxies)
except requests.exceptions.ConnectionError:
link_error = True
print('working1b')
if link_error == False:
stat = raw.status_code
global soup
rawd = raw.content
soup = BeautifulSoup(rawd, 'lxml')
global link_raw
print('working1c')
link_raw = soup.find_all('a')
print(link_raw)
return link_raw
else:
print('not working')