我想单击新页面数据表右上方的“以CSV格式下载文件”,或者如何复制整个表,然后将其粘贴到含硒或其他更好的文件中下载或复制过去的方式。问题在于代码仅适用于旧页面的HTML元素,而不适用于新页面的元素。所以我也想知道如何用新页面的新HTML元素唤醒相同的硒代码。
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
##chrome_options = webdriver.ChromeOptions()
##chrome_options.add_argument("--disable-infobars")
##driver = webdriver.Chrome(chrome_options=options)
browser = webdriver.Chrome()
browser.get('https://www.nseindia.com/products/content/equities/equities/eq_security.htm')
equity_name = browser.find_element_by_name('symbol')
equity_name.send_keys('YESBANK')
v = browser.find_element_by_css_selector("input#rdDateToDate")
v.click()
date_from = browser.find_element_by_id('fromDate')
date_from.send_keys('01-03-2019')
date_to = browser.find_element_by_id('toDate')
date_to.send_keys('01-05-2019')
get_data = browser.find_element_by_xpath('//*[@id="get"]').click()
get_table = browser.find_element_by_xpath('/html/body/div[2]/div[3]/div[2]/div[1]/div[3]/div/div[3]/table').send_keys(Keys.CONTROL, 'c')
答案 0 :(得分:2)
尝试一下:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
browser = webdriver.Chrome()
browser.get('https://www.nseindia.com/products/content/equities/equities/eq_security.htm')
# insert yesbank string
equity_name = browser.find_element_by_name('symbol')
equity_name.send_keys('YESBANK')
#select radio button
v = browser.find_element_by_css_selector("input#rdDateToDate")
v.click()
#insert from date
date_from = browser.find_element_by_id('fromDate')
date_from.send_keys('01-03-2019')
#insert to date
date_to = browser.find_element_by_id('toDate')
date_to.send_keys('01-05-2019')
# submit form
get_data = browser.find_element_by_xpath('//*[@id="get"]').click()
# wait util load table data and csv file
WebDriverWait(browser, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "download-data-link")))
#find csv file link parent tag by class name, finding parent tag to child tag because csv file link tag does not contain class name or id.
parentElement = browser.find_element_by_class_name('download-data-link')
# find csv file link by parent tag
link = parentElement.find_elements_by_tag_name("a")
#download csv file
print(link[0].click())
其中"download-data-link"
跨度标记类和a
csv file link which
在跨度标记内部
报废表格数据:
添加脚本顶部:
from bs4 import BeautifulSoup
在WebDriverWait(browser, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "download-data-link")))
行代码后添加
soup = BeautifulSoup(browser.page_source, 'lxml')
div = page_soup.find("div", {'class':"tabular-data-historic"})
table = div.find("table").find("tbody")
for tr in table.find_all("tr"):
for td in tr.find_all("td"):
print(td.text)