代码如下:
import pandas as pd
from bs4 import BeautifulSoup
from time import sleep
from random import randint
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
driver = webdriver.Chrome("chromedriver 3")
player_ids = ['114']
rd_id = 1
# iterating through player_ids
for i in player_ids:
team_id = i
base_url = "https://bet365.apps.imgarena.com/golf/3.18.0/full/?eventId=183&language=en&options=eyJ2aWRlb1BsYXliYWNrRW5hYmxlZCI6ZmFsc2V9#/leaderboard/team/"
mid_url = "?roundNo="
end_url = "&holeNo=1"
team_url = base_url + str(team_id) + mid_url + str(rd_id) + end_url
driver.get(team_url)
shotsbtn = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH,'//*[@id="root"]/div[1]/div[1]/div[1]/div[1]/div[3]/div[3]/span[1]')))
shotsbtn.click()
results = []
# iterating through each hole 1-18
for i in range(1, 3):
hole_id = i
base_xpath = '//*[@id="root"]/div[1]/div[1]/div[1]/div[1]/div[2]/div[3]/div[1]/div[2]/div[1]/div['
end_xpath = ']/span[1]'
full_xpath = base_xpath + str(hole_id) + end_xpath
sleep(randint(2,5))
holebtn = WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.XPATH,full_xpath)))
holebtn.click()
sleep(randint(2,5))
content = driver.page_source
soup = BeautifulSoup(content, "html.parser")
# pulling required shot data from each hole
for a in soup.findAll('div', {'class': 'text__42f569fe no-name__42f569fe'}):
name = a.find
results.append(name)
# exporting information by player to.csv
df = pd.DataFrame({'Player': results})
df.to_csv('player_' + str(team_id) + '.csv', index=False, encoding='utf-8')
我想做的是提取“玩家名称”和/或“ team_id”以及每个“ hole_id”,以便每个镜头都被唯一标识。
此刻,我将其打开为.csv:
此刻我看起来像这样:
Shot 4: </span <span in the hole for Par</span</div
Shot 3: </span <span stroke 3, 41 yds to Green, 9 ft 6 in. left to pin</span</div
Shot 2: </span <span stroke 2, 135 yds to Rough, 43 yds left to pin</span</div
Shot 1: </span <span stroke 1, 281 yds to Rough, 177 yds left to pin</span</div
理想情况下,我希望它看起来像这样:
Hole 1, 114, 'Hatton, Tyrrell', Shot 4: </span <span in the hole for Par</span</div
Hole 1, 114, 'Hatton, Tyrrell', Shot 3: </span <span stroke 3, 41 yds to Green, 9 ft 6 in. left to pin</span</div
Hole 1, 114, 'Hatton, Tyrrell', Shot 2: </span <span stroke 2, 135 yds to Rough, 43 yds left to pin</span</div
Hole 1, 114, 'Hatton, Tyrrell', Shot 1: </span <span stroke 1, 281 yds to Rough, 177 yds left to pin</span</div
答案 0 :(得分:1)
经过数小时的努力,我得出了最终的代码。在这里:
import pandas as pd
from bs4 import BeautifulSoup
from time import sleep
from random import randint
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
driver = webdriver.Chrome()
player_ids = ['114','115']
rd_id = 1
dictionary = {'Hole Number': [],
'Id': [],
'Player name': [],
'Shot number': [],
'Shot details': []}
# iterating through player_ids
name_list = []
hole_num_list = []
id_list = []
shot_data_final = []
shot_num_list = []
for i in player_ids:
team_id = i
base_url = "https://bet365.apps.imgarena.com/golf/3.18.0/full/?eventId=183&language=en&options=eyJ2aWRlb1BsYXliYWNrRW5hYmxlZCI6ZmFsc2V9#/leaderboard/team/"
mid_url = "?roundNo="
end_url = "&holeNo=1"
team_url = base_url + str(team_id) + mid_url + str(rd_id) + end_url
driver.get(team_url)
shotsbtn = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH,'//*[@id="root"]/div[1]/div[1]/div[1]/div[1]/div[3]/div[3]/span[1]')))
shotsbtn.click()
results = []
# iterating through each hole 1-18
for i in range(1, 3):
hole_id = i
base_xpath = '//*[@id="root"]/div[1]/div[1]/div[1]/div[1]/div[2]/div[3]/div[1]/div[2]/div[1]/div['
end_xpath = ']/span[1]'
full_xpath = base_xpath + str(hole_id) + end_xpath
sleep(randint(2,5))
holebtn = WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.XPATH,full_xpath)))
holebtn.click()
sleep(randint(2,5))
content = driver.page_source
soup = BeautifulSoup(content, "html.parser")
name = soup.find('div',class_ = "player-name__691c96bc").text
hole_num = int(soup.find('span',class_ = "inner__-3c5fde4a active__-3c5fde4a").text)
shot_data = soup.find('ul',class_ = "list__3ed248d4").find_all('li')
for shot in shot_data:
shott = shot.text.split(':')
shot_num = shott[0]
shot_num_list.append(shot_num)
shot_data_final.append(shott[-1].strip())
for x in range(len(shot_data_final) - len(id_list)):
name_list.append(name)
hole_num_list.append(hole_num)
id_list.append(team_id)
hole_num_list.append('')
id_list.append('')
name_list.append('')
shot_num_list.append('')
shot_data_final.append('')
driver.close()
dictionary['Hole Number'] = hole_num_list
dictionary['Id'] = id_list
dictionary['Player name'] = name_list
dictionary['Shot number'] = shot_num_list
dictionary['Shot details'] = shot_data_final
df = pd.DataFrame(dictionary)
df.to_csv('Shots.csv',index=False)
输出:
该代码几乎是不言自明的,因此我认为没有太多要解释的内容。希望这会有所帮助!