我一直在使用Selenium对来自多个表的数据进行Web抓取,以将它们转换为数据框,最终保存了一个csv。尽管代码最初运行良好,但突然在第4个表('api_retired')停止并抛出了错误: “ WebDriverException:消息:断开连接:无法从渲染器接收消息 (会话信息:chrome = 83.0.4103.116)”
我已经检查了chrome和chromedriver都是最新的,并将它们连接到本地计算机上的usr / local / bin路径。我还尝试重新下载chromedriver并创建自己的路径'/ Users / username / Desktop / chromedriver'。这返回了一个错误,指出chromedriver不存在。 因此,我选择使用“ driver = webdriver.Chrome(ChromeDriverManager()。install())”来解决。这样会打开一个页面,并在引发错误之前先刮了前100页。
请问如何解决此错误?我已经玩了一个星期了,现在失去了耐心/希望。
以下是完整的代码和错误:
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.get('https://mer.markit.com/br-reg/public/index.jsp?entity=apiRetirement&sort=account_name&dir=ASC&start=0&entity_domain=Markit')
page = 1
max_page=3865
retired_date2 = []
vintage2 = []
project2 = []
account2 = []
benef_owner2 = []
standard2 = []
pj_type2 = []
retirement_qt2 = []
measurement2 = []
m_type2 = []
details2 = []
while page <= max_page:
api_retired2= WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.TAG_NAME, "tr")))
for i in api_retired2:
#print(project)
try:
retired_date2.append(i.find_element_by_xpath('./td[1]').text)
except:
retired_date2.append(np.nan)
try:
vintage2.append(i.find_element_by_xpath('./td[2]').text)
except:
vintage2.append(np.nan)
try:
project2.append(i.find_element_by_xpath('./td[3]').text)
except:
project2.append(np.nan)
try:
account2.append(i.find_element_by_xpath('./td[4]').text)
except:
account2.append(np.nan)
try:
benef_owner2.append(i.find_element_by_xpath('./td[5]').text)
except:
benef_owner2.append(np.nan)
try:
standard2.append(i.find_element_by_xpath('./td[6]').text)
except:
standard2.append(np.nan)
try:
pj_type2.append(i.find_element_by_xpath('./td[7]').text)
except:
pj_type2.append(np.nan)
try:
retirement_qt2.append(i.find_element_by_xpath('./td[8]').text)
except:
retirement_qt2.append(np.nan)
try:
measurement2.append(i.find_element_by_xpath('./td[9]').text)
except:
measurement2.append(np.nan)
try:
m_type2.append(i.find_element_by_xpath('./td[10]').text)
except:
m_type2.append(np.nan)
try:
details2.append(i.find_element_by_tag_name('a').get_attribute('href'))
except:
details2.append(np.nan)
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, '//*[@id="next_pager"]/a'))).click()
page=page+1
print('navigate to page: ' + str(page))
driver.close()
api_retired_df2=pd.DataFrame({"retired_date": retired_date2,"vintage":vintage2, "project":project2, "account":account2,
"standard":standard2,"pj_type":pj_type2, "retirement_qt":retirement_qt2, "measurement":measurement2,
"m_type":m_type2, "details":details2})
print(api_retired_df2.head())
api_retired_df2.to_csv('API_retired2.csv',index=False)
WebDriverException Traceback (most recent call last)
<ipython-input-10-88ae5efb5e27> in <module>
83
84
---> 85 WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, '//*[@id="next_pager"]/a'))).click()
86 page=page+1
87 print('navigate to page: ' + str(page))
~/opt/anaconda3/lib/python3.7/site-
packages/selenium/webdriver/remote/webelement.py in click(self)
78 def click(self):
79 """Clicks the element."""
---> 80 self._execute(Command.CLICK_ELEMENT)
81
82 def submit(self):
~/opt/anaconda3/lib/python3.7/site-
packages/selenium/webdriver/remote/webelement.py in _execute(self,
command, params)
631 params = {}
632 params['id'] = self._id
--> 633 return self._parent.execute(command, params)
634
635 def find_element(self, by=By.ID, value=None):
~/opt/anaconda3/lib/python3.7/site-
packages/selenium/webdriver/remote/webdriver.py in execute(self,
driver_command, params)
319 response =
self.command_executor.execute(driver_command, params)
320 if response:
--> 321 self.error_handler.check_response(response)
322 response['value'] = self._unwrap_value(
323 response.get('value', None))
~/opt/anaconda3/lib/python3.7/site-
packages / selenium / webdriver / remote / errorhandler.py在 check_response(自身,响应) 240 alert_text = value ['alert']。get('text') 241引发exception_class(消息,屏幕, stacktrace,alert_text) -> 242引发exception_class(消息,屏幕,堆栈跟踪) 243 244 def _value_or_default(self,obj,key,default):
WebDriverException: Message: disconnected: Unable to receive message
from renderer
(Session info: chrome=83.0.4103.116)
非常非常感谢您的帮助!