这是我到目前为止尝试过的试验。
from urllib.request import urlopen
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
from selenium.common.exceptions import NoSuchElementException
url ='http://marketdata.krx.co.kr/mdi#document=080120&547c5e15ef32e37dc099b89d69ac8970-[object%20HTMLDivElement]=1&547c5e15ef32e37dc099b89d69ac8970-[object%20HTMLDivElement]=2&547c5e15ef32e37dc099b89d69ac8970-[object%20HTMLDivElement]=1&547c5e15ef32e37dc099b89d69ac8970-object%20HTMLDivElement]=1'
driver = webdriver.Chrome()
driver.get(url)
element = driver.find_element_by_xpath('//select[@name="upclss"]')
all_options = element.find_elements_by_tag_name("option")
for option in all_options :
if option.text == "원자재":
option.click()
driver.implicitly_wait(5)
another = driver.find_element_by_xpath('//li[@class="active"]')
another.click()
driver.implicitly_wait(5)
html = driver.page_source
soup = BeautifulSoup(html, "html.parser")
table = soup.findChildren('table')[0]
rows = table.findChildren('tr')
for row in rows:
cells = row.findChildren('td')
for cell in cells:
cell_content = cell.getText()
print(cell_content)
我应该做些什么来从上面的网址下面的表格内容并打印出来? 非常感谢!!
答案 0 :(得分:0)
为什么不从页面来源获取它?我知道你正在使用python,但在Java中我会用这种方式解决这个问题:
我将页面源作为String处理,并获得一个以<table>
开头并以whith结束的子字符串
</table>
或任何你想要的......
由此我将以相同的方式提取我想要的值 - 构建一个以<td>-
标记开头并以</td>-
标记结尾的子字符串。
其余文字是您在网页上看到的表格数据文字。
答案 1 :(得分:0)
html = driver.page_source
的输出(值)会有所帮助,但我认为这也会有效:
from urllib.request import urlopen
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
from selenium.common.exceptions import NoSuchElementException
url ='http://marketdata.krx.co.kr/mdi#document=080120&547c5e15ef32e37dc099b89d69ac8970-[object%20HTMLDivElement]=1&547c5e15ef32e37dc099b89d69ac8970-[object%20HTMLDivElement]=2&547c5e15ef32e37dc099b89d69ac8970-[object%20HTMLDivElement]=1&547c5e15ef32e37dc099b89d69ac8970-object%20HTMLDivElement]=1'
driver = webdriver.Chrome()
driver.get(url)
element = driver.find_element_by_xpath('//select[@name="upclss"]')
all_options = element.find_elements_by_tag_name("option")
for option in all_options :
if option.text == "원자재":
option.click()
driver.implicitly_wait(5)
another = driver.find_element_by_xpath('//li[@class="active"]')
another.click()
driver.implicitly_wait(5)
tds = driver.find_element_by_xpath("//table/tr/td")
for td in tds :
print(td.text)
答案 2 :(得分:0)
最后,它在硒中解决,而不是汤......
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
import time
url = '...'
element = driver.find_element_by_xpath('//select[@name="upclss"]')
all_options = element.find_elements_by_tag_name("option")
for option in all_options :
print(option.text)
option.click()
driver.implicitly_wait(5)
another = driver.find_element_by_xpath('//li[@class="active"]')
another.click()
time.sleep(5)
header = driver.find_element_by_xpath('//table[@class="CI-GRID-HEADER-TABLE"]').text
other = driver.find_element_by_xpath('//table[@class="CI-GRID-BODY-TABLE"]').text
print(header)
print(other)