我是使用python操纵网站的新手。我想查看网站是否有更新的数据。如果可用,请下载。检查是否是新数据不是问题,但问题是当我尝试下载数据后填写下拉菜单失败。该网站使用手风琴表内的超链接图像开始下载。我尝试过的所有内容都无法针对各种错误启动下载。
import time
from selenium import webdriver
from bs4 import BeautifulSoup
##from selenium.webdriver.common.keys import Keys
driver = webdriver.Chrome()
driver.get('http://msc.fema.gov/portal/advanceSearch#searchresultsanchor')
from selenium.webdriver.support.ui import Select
##Fill in drop boxes
select = Select(driver.find_element_by_id('selstate'))
select.select_by_index(18)
time.sleep(5)
select1 = Select(driver.find_element_by_id('selcounty'))
select1.select_by_index(1)
time.sleep(5)
select2 = Select(driver.find_element_by_id('selcommunity'))
select2.select_by_index(1)
time.sleep(5)
driver.find_element_by_css_selector('.btn.btn-primary').click()
time.sleep(5)
content = driver.page_source
soup = BeautifulSoup(content, "lxml")
cdate = "NFHL_19_20170621"
#elem = driver.find_element_by_xpath("""//*[@id="nfhl_state_list"]/table/tbody/tr[1]/td/table[2]/tbody/tr/td[2]/table/tbody/tr[4]/td[1]""")
#print(elem.text)
##search webpage to see if data has bee updated
if str(soup).find(cdate) > -1:
print 'found'
else:
print 'not found'
##Download state wide data
link = driver.find_element_by_css_selector('#nfhl_state_list > tr:nth-
child(1) > td:nth-child(5) > a').get_attribute('href')
strlink = str(link)
答案 0 :(得分:0)
试试这个,我刚刚暴露了DL img,然后点击它。另外在旁注,尝试使用explict等待 http://selenium-python.readthedocs.io/waits.html#explicit-waits
import time
from selenium.webdriver.support.select import Select as WebDriverSelect
driver = WebDriver(desired_capabilities=options.secure_options.to_capabilities(),
command_executor=config.command_executor)
driver.get('http://msc.fema.gov/portal/advanceSearch#searchresultsanchor')
state = WebDriverSelect(driver.find_element_by_id('selstate'))
county = WebDriverSelect(driver.find_element_by_id('selcounty'))
community = WebDriverSelect(driver.find_element_by_id('selcommunity'))
state.select_by_visible_text('INDIANA')
county.select_by_visible_text('ADAMS COUNTY')
time.sleep(2)
community.select_by_index(1)
driver.find_element_by_id('mainSearch').click()
time.sleep(2)
driver.find_element_by_id('eff_root').click()
time.sleep(2)
driver.find_element_by_id('eff_nfhl_state_root').click()
time.sleep(2)
driver.find_element_by_css_selector('tbody[id="nfhl_state_list"] img').click()