我有一个django代码,该代码使用独立的硒铬驱动程序。
我正在尝试从url获取表的属性,但是找不到该表。我已经使用EC.visibility_of_element_located方法尝试了find_element_by_xpath和WebDriverWait。这两个表找不到。
from selenium import webdriver
def setup_driver(self):
logging.info("Setup driver...")
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument('--disable-extensions')
chrome_options.add_argument("--incognito")
chrome_options.add_argument("--disable-plugins-discovery")
chrome_options.add_argument('--user-data-dir=/tmp')
chrome_options.add_argument('--profile-directory=Default')
chrome_options.add_argument('--headless')
self.driver = webdriver.Chrome(chrome_options=chrome_options)
self.driver.implicitly_wait(15)
----这是另一个py文件----
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
def __init__(self, remote=False, rental=False):
info = {
"url": "https://login.fmls.com/SAML/login.aspx?ReturnUrl=%2fsaml%2fSAML%2fSSOService.aspx%3fSAMLRequest%3djZLNTsMwEIRfJfKBWxLHrWhrmqCKCilSEagBDlyQ62waS7EdvE7VxydJy98BxMXyrnY934y8RKGblq86X5stvHWAPsjXKXlNaLUDyapwBxULp7tJFQo6KcOFhHkypwDsckaCZ3CorEkJiygJcsQOcoNeGN%252B3aLII6TRk88ck4RPK2Sxi08ULCda9ijLCj5u19y3yOG7sXpmo0g1G0up44IqL1d0mLor7AtxBSYgEtkcS3FonYSROSSUahEH5QSCqA3x2VojgBoUba7DT4M5v5KaEY0ro7xNP280XlRbeqeOIVUb9ca4HwtP1zD2QXbc9g6%252Bd7fZ1yi6Ebq%252BMdVAqB9KnCQmOujHIx8RT0jnDrUCF3AgNyL3kg13eJ8lbZ72VtiHZcpjmY7Du2%252F7f6%252BLDGcn%252B7WMZf1PKTtXPf5G9Aw%253D%253D%26RelayState%3dMatrix%2bSAML%2bLogin",
"login_info": {
"username": "your_username",
"passwd": "your_passwd",
"username_input_xpath": "//input[@id = 'PblcID']",
"passwd_input_xpath": "//input[@id = 'passwordTextBox']",
},
"search_info": {
"residential_url": "https://matrix.fmlsd.mlsmatrix.com/Matrix/Search/Residential",
"rental_url": "https://matrix.fmlsd.mlsmatrix.com/Matrix/Search/ResidentialIncome",
"status_table_xpath": "//table[@class='S_MultiStatus']"
},
"table_info": {
"page_size_id": "m_ucDisplayPicker_m_ddlPageSize",
"page_format_id": "m_ucDisplayPicker_m_ddlDisplayFormats",
"next_button_xpath": "//span[@class='pagingLinks']/a[2]",
"table_xpath": "//*[@id='m_pnlDisplay']/table"
}
}
vars_range = range(0, 7) #
super().__init__(info, vars_range, remote, rental)
def login_mls(self): #
# Login
self.driver.get(self.info['url'])
form = self.driver.find_element_by_xpath("//form")
user = form.find_element_by_xpath(self.info["login_info"]["username_input_xpath"])
password = form.find_element_by_xpath(self.info["login_info"]["passwd_input_xpath"])
user.send_keys(self.info["login_info"]["username"])
password.send_keys(self.info["login_info"]["passwd"])
button = form.find_element_by_xpath("//input[@id = 'loginButton']")
button.click()
date_string ="2019-05-09"
rental = False
def search(self, date_string: str):
# Get url
if self.rental:
self.driver.get(self.info["search_info"]["rental_url"])
else:
self.driver.get(self.info["search_info"]["residential_url"])
inputs = self.driver.find_elements_by_xpath(self.info["search_info"]["status_table_xpath"] + "//input")
if self.rental:
inputs = inputs[0:10]
else:
inputs = inputs[0:10]
last_idx = len(inputs) - 2
self.driver.execute_script("window.scrollTo(0, 90);")
for i in range(0, len(inputs), 2):
field = inputs[i + 1]
field.send_keys(date_string)
checkbox = inputs[i]
checkbox.click()
if i == last_idx:
field.send_keys(Keys.ENTER)
def scrape_table(self, date: pd.datetime, page: int, var: int) -> pd.DataFrame:
try:
table = self.driver.find_element_by_xpath(self.info["table_info"]["table_xpath"])
table = WebDriverWait(self.driver, 15).until(EC.visibility_of_element_located((By.XPATH, self.info["table_info"]["table_xpath"])))
table = table[0]
html = table.get_attribute("outerHTML")
df = pd.read_html(html)[0]
return df
except:
raise MLSException("scrape_table {} p{} v{} error.".format(date, page, var))
在此代码的最后,我应该获取一个熊猫数据表,但实际上得到的是错误消息:TimeoutException(消息,屏幕,堆栈跟踪) 最后一个函数中的表变量出现selenium.common.exceptions.TimeoutException。