Django Selenium WebDriver找不到表

时间:2019-05-10 06:15:48

标签: django selenium google-chrome selenium-chromedriver

我有一个django代码,该代码使用独立的硒铬驱动程序。

我正在尝试从url获取表的属性,但是找不到该表。我已经使用EC.visibility_of_element_located方法尝试了find_element_by_xpath和WebDriverWait。这两个表找不到。

from selenium import webdriver

def setup_driver(self):
    logging.info("Setup driver...")
    chrome_options = webdriver.ChromeOptions()
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument('--disable-extensions')
    chrome_options.add_argument("--incognito")
    chrome_options.add_argument("--disable-plugins-discovery")
    chrome_options.add_argument('--user-data-dir=/tmp')
    chrome_options.add_argument('--profile-directory=Default')
    chrome_options.add_argument('--headless')

    self.driver = webdriver.Chrome(chrome_options=chrome_options)

    self.driver.implicitly_wait(15)

----这是另一个py文件----

from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC

import pandas as pd
def __init__(self, remote=False, rental=False):

    info = {
        "url": "https://login.fmls.com/SAML/login.aspx?ReturnUrl=%2fsaml%2fSAML%2fSSOService.aspx%3fSAMLRequest%3djZLNTsMwEIRfJfKBWxLHrWhrmqCKCilSEagBDlyQ62waS7EdvE7VxydJy98BxMXyrnY934y8RKGblq86X5stvHWAPsjXKXlNaLUDyapwBxULp7tJFQo6KcOFhHkypwDsckaCZ3CorEkJiygJcsQOcoNeGN%252B3aLII6TRk88ck4RPK2Sxi08ULCda9ijLCj5u19y3yOG7sXpmo0g1G0up44IqL1d0mLor7AtxBSYgEtkcS3FonYSROSSUahEH5QSCqA3x2VojgBoUba7DT4M5v5KaEY0ro7xNP280XlRbeqeOIVUb9ca4HwtP1zD2QXbc9g6%252Bd7fZ1yi6Ebq%252BMdVAqB9KnCQmOujHIx8RT0jnDrUCF3AgNyL3kg13eJ8lbZ72VtiHZcpjmY7Du2%252F7f6%252BLDGcn%252B7WMZf1PKTtXPf5G9Aw%253D%253D%26RelayState%3dMatrix%2bSAML%2bLogin",
        "login_info": {
            "username": "your_username",
            "passwd": "your_passwd",
            "username_input_xpath": "//input[@id = 'PblcID']",
            "passwd_input_xpath": "//input[@id = 'passwordTextBox']",
        },
        "search_info": {
            "residential_url": "https://matrix.fmlsd.mlsmatrix.com/Matrix/Search/Residential",
            "rental_url": "https://matrix.fmlsd.mlsmatrix.com/Matrix/Search/ResidentialIncome",
            "status_table_xpath": "//table[@class='S_MultiStatus']"
        },
        "table_info": {
            "page_size_id": "m_ucDisplayPicker_m_ddlPageSize",
            "page_format_id": "m_ucDisplayPicker_m_ddlDisplayFormats",
            "next_button_xpath": "//span[@class='pagingLinks']/a[2]",
            "table_xpath": "//*[@id='m_pnlDisplay']/table"
        }
    }
    vars_range = range(0, 7) #
    super().__init__(info, vars_range, remote, rental)

def login_mls(self): #
    # Login
    self.driver.get(self.info['url'])
    form = self.driver.find_element_by_xpath("//form")
    user = form.find_element_by_xpath(self.info["login_info"]["username_input_xpath"])
    password = form.find_element_by_xpath(self.info["login_info"]["passwd_input_xpath"])
    user.send_keys(self.info["login_info"]["username"])
    password.send_keys(self.info["login_info"]["passwd"])
    button = form.find_element_by_xpath("//input[@id = 'loginButton']")
    button.click()

date_string ="2019-05-09"
rental = False

def search(self, date_string: str):
    # Get url
    if self.rental:
        self.driver.get(self.info["search_info"]["rental_url"])
    else:
        self.driver.get(self.info["search_info"]["residential_url"])

    inputs = self.driver.find_elements_by_xpath(self.info["search_info"]["status_table_xpath"] + "//input")
    if self.rental:
        inputs = inputs[0:10]
    else:
        inputs = inputs[0:10]
    last_idx = len(inputs) - 2

    self.driver.execute_script("window.scrollTo(0, 90);")
    for i in range(0, len(inputs), 2):
        field = inputs[i + 1]
        field.send_keys(date_string)
        checkbox = inputs[i]
        checkbox.click()
        if i == last_idx:
            field.send_keys(Keys.ENTER)

def scrape_table(self, date: pd.datetime, page: int, var: int) -> pd.DataFrame:
    try:
        table = self.driver.find_element_by_xpath(self.info["table_info"]["table_xpath"])
        table = WebDriverWait(self.driver, 15).until(EC.visibility_of_element_located((By.XPATH, self.info["table_info"]["table_xpath"])))
        table = table[0]
        html = table.get_attribute("outerHTML")
        df = pd.read_html(html)[0]
        return df
    except:
        raise MLSException("scrape_table {} p{} v{} error.".format(date, page, var))

在此代码的最后,我应该获取一个熊猫数据表,但实际上得到的是错误消息:TimeoutException(消息,屏幕,堆栈跟踪) 最后一个函数中的表变量出现selenium.common.exceptions.TimeoutException。

0 个答案:

没有答案