作为一名Python新手,我试图从This Site
中删除一些数据主要目标是使用pandas将每个选项的数据提取到excel文件。
作为最快的一步,我们尝试从下拉列表中获取所有选项,并使用以下代码。 (Python 3.6.0)
import sys
import signal
from selenium import webdriver
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import NoSuchElementException, StaleElementReferenceException
def sigint(signal, frame):
sys.exit(0)
def make_waitfor_elem_updated_predicate(driver, waitfor_elem_id):
elem = driver.find_element_by_id(waitfor_elem_id)
def elem_updated(driver):
try:
elem.text
except StaleElementReferenceException:
return True
except:
pass
return False
return lambda driver: elem_updated(driver)
class Scraper(object):
def __init__(self):
self.url = 'https://seffaflik.epias.com.tr/transparency/uretim/planlama/kgup.xhtml'
self.driver = webdriver.Chrome()
self.driver.maximize_window()
def get_select(self, id):
select_elem = self.driver.find_element_by_id(id)
select = Select(select_elem)
return select
def select_option(self, id, value, waitfor_elem_id=None):
if waitfor_elem_id:
func = make_waitfor_elem_updated_predicate(
self.driver,
waitfor_elem_id
)
select = self.get_select(id)
select.select_by_value(value)
if waitfor_elem_id:
wait = WebDriverWait(self.driver, 10)
wait.until(func)
return self.get_select(id)
def make_select_option_iterator(self, id, waitfor_elem_id):
def next_option(id, waitfor_elem_id):
select = self.get_select(id)
select_option_values = [
'%s' % o.get_attribute('value')
for o
in select.options
if o.text != 'TÜMÜ'
]
for v in select_option_values:
select = self.select_option(id, v, waitfor_elem_id)
yield select.first_selected_option.text
return lambda: next_option(id, waitfor_elem_id)
def load_page(self):
self.driver.get(self.url)
def page_loaded(driver):
id = 'j_idt102:distributionId_input'
return driver.find_element_by_id(id)
wait = WebDriverWait(self.driver, 10)
wait.until(page_loaded)
def scrape(self):
organisations = self.make_select_option_iterator(
'j_idt102:distributionId_input',
'j_idt102:uevcb_input'
)
units = self.make_select_option_iterator(
'j_idt102:uevcb_input',
'j_idt102:uevcb_input'
)
self.load_page()
for organisation in organisations():
print (organisation)
for unit in units():
print (2*' ', unit)
if __name__ == '__main__':
signal.signal(signal.SIGINT, sigint)
scraper = Scraper()
scraper.scrape()
我们从select元素中获取id,但错误代码表示:
selenium.common.exceptions.UnexpectedTagNameException: Message: Select only works on elements, not on
对此有何想法?
感谢。
答案 0 :(得分:0)
这是因为您尝试将类Select
应用于<div>
元素,而您只能将Select
与<select>
元素一起使用!
尝试通过单击下拉按钮然后单击所需选项来处理下拉列表,例如:
driver.find_element_by_id('j_idt102:distributionId_label').click() # opens drop-down
driver.find_element_by_id('j_idt102:distributionId_1').click() # select option