我正在使用硒来刮擦该网站: https://www.fedsdatacenter.com/federal-pay-rates/index.php?y=all&n=&l=&a=&o=
通过持续单击下一步并解析表直到出现警告消息,我的代码才能很好地工作:
DataTables警告:表格ID =表格示例-无效的JSON响应。
,由于此错误,我的代码停止了。即使是手动操作,单击“下一步”也会给我同样的警告。
这是我的代码。我该怎么办?如果有什么方法可以改善我的代码,请帮助我。
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import TimeoutException
from selenium.common.exceptions import ElementNotVisibleException
from selenium.common.exceptions import StaleElementReferenceException
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
import csv
import time
def has_class_onclick(tag):
return tag.has_attr('onclick')
def extract_table_content_into_rows(website_lists):
# This function is to extract all the table content from and put them into a list of row.
list_of_row = []
for table_page in website_lists:
soup_page = BeautifulSoup(table_page, "html.parser")
soup_table_raw = soup_page.find("table")
if soup_table_raw:
soup_table = soup_table_raw.find("tbody")
for soup_row in soup_table.find_all("tr"):
row_content = []
for soup_column in soup_row.find_all("td"):
if not soup_column.contents:
row_content.append(".")
else:
column_content = soup_column.contents[0].strip()
row_content.append(column_content)
list_of_row.append(row_content)
else:
continue
return list_of_row
def csv_writer(lists_of_row):
# This function is to write the table contents into a csv file.
with open("federal.csv", "at", newline="") as csvfile:
for row_to_write in lists_of_row:
writer = csv.writer(csvfile)
writer.writerow(row_to_write)
driver = webdriver.Chrome('chromedriver') # Optional argument, if not specified will search path.
driver.get('https://www.fedsdatacenter.com/federal-pay-rates/index.php?y=all&n=&l=&a=&o=')
driver.find_element_by_xpath('//*[@id="table-example_length"]/label/select').click()
time.sleep(3)
driver.find_element_by_xpath('//*[@id="table-example_length"]/label/select/option[4]').click()
time.sleep(3)
page_num = 1
while page_num > 0 and page_num <= 5:
html = driver.page_source
website_list = [html]
row_list = extract_table_content_into_rows(website_list)
print(row_list)
csv_writer(row_list)
driver.find_element_by_xpath('//*[@id="table-example_next"]/a').click()
time.sleep(3)
print(page_num)
page_num += 1
while page_num > 5:
html = driver.page_source
website_list = [html]
row_list = extract_table_content_into_rows(website_list)
print(row_list)
csv_writer(row_list)
driver.find_element_by_xpath('//*[@id="table-example_next"]/a').click()
not_find = 1
while not_find == 1:
try:
driver.find_element_by_xpath('//*[@id="table-example_paginate"]/ul/li[6]/a')
while driver.find_element_by_xpath('//*[@id="table-example_pagina'
'te"]/ul/li[6]/a').text != str(page_num + 2):
time.sleep(0.1)
not_find = 0
except StaleElementReferenceException:
continue
print(page_num)
page_num += 1
答案 0 :(得分:0)
一种方法是使用某些JavaScript禁用页面上的所有警报:
driver.execute_script('window.alert = function() {};')