我正在尝试在 stubhub 上为一场比赛刮取门票价格。我没有成功连接到 API,所以我想我会改用这种方法,但遇到了一些问题......
https://www.stubhub.com/chicago-cubs-chicago-tickets-5-3-2021/event/104736306/
为什么这个 XPATH 会产生一个空白值?:
price = WebDriverWait(driver, 0.01).until(EC.presence_of_element_located((By.XPATH, '//*[@id="main"]/div/div/div[1]/div[2]/section[1]/div[1]/ul/div/li[34]/div[2]/div[1]/div[1]/div'))).text
更大的代码片段:
driver.get('https://www.stubhub.com/find/s/?q=chicago%20cubs')
matchups = [i.text for i in WebDriverWait(driver, 1).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, '.EventItem__TitleLink')))]
games = [i.get_attribute('href') for i in WebDriverWait(driver, 1).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, '.EventItem__TitleLink')))]
game_details = pd.DataFrame()
game_details['Matchup'] = matchups
game_details['Link'] = games
print(game_details)
#gather ticket data
urls = []
location = []
prices = []
details = []
for g in games:
try:
driver.get(g + '?priceWithFees=true')
price = [i.text for i in WebDriverWait(driver, 100).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, '.AdvisoryPriceDisplay__content')))]
price = price[:-2]
loc = [i.text for i in WebDriverWait(driver, 100).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, '.RoyalTicketListPanel__SectionName')))]
detail = [i.text for i in WebDriverWait(driver, 100).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, '.RoyalTicketListPanel__SecondaryInfo')))]
url = [str(g)] * len(price)
urls.extend(url)
prices.extend(price)
location.extend(loc)
details.extend(detail)
print(str(g) + ": " + len(price) + " ")
except:
print('Failed: ' + str(g))
pass
ticket_prices = pd.DataFrame()
ticket_prices['Price'] = prices
ticket_prices['Location'] = location
ticket_prices['Detail'] = details
ticket_prices['Link'] = urls
print(ticket_prices.shape)
print(ticket_prices)
writer = pd.ExcelWriter(final, engine='xlsxwriter')
game_details.to_excel(writer, sheet_name='Games')
ticket_prices.to_excel(writer, sheet_name='Tickets')
writer.save()