使用硒获取完整的数据表抓取

时间:2019-11-11 12:08:36

标签: selenium-webdriver python-3.6

我想从山桌之王中抓取数据并返回准确的数据。我的代码返回了不正确的数据。我需要获取(例如第1行)。我只想使用Selenium而不是Json。

1。 洛杉矶勒布朗 2 0 0

1.000

2

from selenium import webdriver
 driver=webdriver.Chrome(executable_path="D:\Programs\Programs\chromedriver\chromedriver.exe")
 driver.get("https://fantasy.espn.com/basketball/league/standings?leagueId=1878319")
rows=len(driver.find_elements_by_xpath('//*[@id="espn-analytics"]/div/div[5]/div[2]/div[1]/div/div/div[3]/div/section/table/tbody/tr/td/div/div/div[2]/table/tbody/tr/td/div/table/tbody/tr'))
>>> print(rows)
10
>>> cols=len(driver.find_elements_by_xpath('//*[@id="espn-analytics"]/div/div[5]/div[2]/div[1]/div/div/div[3]/div/section/table/tbody/tr/td/div/div/div[2]/table/tbody/tr/td/div/table/thead/tr/th'))
>>> print(cols)
7
>>> for r in range(2,rows+1):
    for c i range(1,cols+1):
value=driver.find_element_by_xpath('//*[@id="espn-analytics"]/div/div[5]/div[2]/div[1]/div/div/div[3]/div/section/table/tbody/tr/td/div/div/div[2]/table/tbody/tr/td/div/table/tbody/tr["+str(r)+"]/td["+str(c)+"]').text
        print(value,end='')
    print()

我得到的是: 1111111 1111111 1111111 1111111 1111111 1111111 1111111 1111111 1111111

1 个答案:

答案 0 :(得分:1)

for循环不正确。您可以按照以下示例进行操作,它会正确打印所有行。

from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait

driver=webdriver.Chrome(executable_path="D:\Programs\Programs\chromedriver\chromedriver.exe")
driver.get("https://fantasy.espn.com/basketball/league/standings?leagueId=1878319")
# wait until my page table is loaded
WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.CLASS_NAME, 'Table2__tbody')))

rows=len(driver.find_elements_by_xpath('//*[@id="espn-analytics"]/div/div[5]/div[2]/div[1]/div/div/div[3]/div/section/table/tbody/tr/td/div/div/div[2]/table/tbody/tr/td/div/table/tbody/tr'))
print(rows)
cols=len(driver.find_elements_by_xpath('//*[@id="espn-analytics"]/div/div[5]/div[2]/div[1]/div/div/div[3]/div/section/table/tbody/tr/td/div/div/div[2]/table/tbody/tr/td/div/table/thead/tr/th'))
print(cols)
for r in range(rows):
    values = driver.find_elements_by_xpath("(//*[@class='Table2__tbody'])[1]/tr["+str(r+1)+"]/td")
    print("Printing Row value#: "+str(r+1))
    for value in values:
        print(value.text)

输出:

Printing Row value#: 1
1
los angeles lebrons
3
0
0
1.000

-