使用Python迭代依赖的下拉列表

时间:2017-02-03 08:37:28

标签: python python-3.x selenium drop-down-menu web-scraping

作为一名Python新手,我试图从This Site

中删除一些数据

主要目标是使用pandas将每个选项的数据提取到excel文件。

作为最快的一步,我们尝试从下拉列表中获取所有选项,并使用以下代码。 (Python 3.6.0)

 
import sys
import signal
from selenium import webdriver
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import NoSuchElementException, StaleElementReferenceException

def sigint(signal, frame):
    sys.exit(0)

def make_waitfor_elem_updated_predicate(driver, waitfor_elem_id):
    elem = driver.find_element_by_id(waitfor_elem_id)

    def elem_updated(driver):
        try:
            elem.text
        except StaleElementReferenceException:
            return True
        except:
            pass

        return False

    return lambda driver: elem_updated(driver)

class Scraper(object):
    def __init__(self):
        self.url = 'https://seffaflik.epias.com.tr/transparency/uretim/planlama/kgup.xhtml'
        self.driver = webdriver.Chrome()
        self.driver.maximize_window()

    def get_select(self, id):
        select_elem = self.driver.find_element_by_id(id)
        select = Select(select_elem)
        return select

    def select_option(self, id, value, waitfor_elem_id=None):
        if waitfor_elem_id:
            func = make_waitfor_elem_updated_predicate(
                self.driver, 
                waitfor_elem_id
            )

        select = self.get_select(id)
        select.select_by_value(value)

        if waitfor_elem_id:
            wait = WebDriverWait(self.driver, 10)
            wait.until(func)

        return self.get_select(id)

    def make_select_option_iterator(self, id, waitfor_elem_id):
        def next_option(id, waitfor_elem_id):
            select = self.get_select(id)
            select_option_values = [ 
                '%s' % o.get_attribute('value') 
                for o 
                in select.options 
                if o.text != 'TÜMÜ'
            ]

            for v in select_option_values:
                select = self.select_option(id, v, waitfor_elem_id)
                yield select.first_selected_option.text

        return lambda: next_option(id, waitfor_elem_id)

    def load_page(self):
        self.driver.get(self.url)

        def page_loaded(driver):
            id = 'j_idt102:distributionId_input'
            return driver.find_element_by_id(id)

        wait = WebDriverWait(self.driver, 10)
        wait.until(page_loaded)            

    def scrape(self):
        organisations = self.make_select_option_iterator(
            'j_idt102:distributionId_input',
            'j_idt102:uevcb_input'
        )

        units = self.make_select_option_iterator(
            'j_idt102:uevcb_input',
            'j_idt102:uevcb_input'
        )


        self.load_page()

        for organisation in organisations():
            print (organisation)
            for unit in units():
                print (2*' ', unit)

if __name__ == '__main__':
    signal.signal(signal.SIGINT, sigint)
    scraper = Scraper()
    scraper.scrape()

我们从select元素中获取id,但错误代码表示:

selenium.common.exceptions.UnexpectedTagNameException: Message: Select only works on  elements, not on 

对此有何想法?

感谢。

1 个答案:

答案 0 :(得分:0)

这是因为您尝试将类Select应用于<div>元素,而您只能将Select<select>元素一起使用!

尝试通过单击下拉按钮然后单击所需选项来处理下拉列表,例如:

driver.find_element_by_id('j_idt102:distributionId_label').click() # opens drop-down
driver.find_element_by_id('j_idt102:distributionId_1').click() # select option