继续深入研究从网站上抓取数据。试图从本网站seatgeeks中提取数据以获得一些列。我无法专门访问定价和链接数据。以下代码运行良好,但我无法获得有关定价和链接的准确数据。即使每个按钮的数字不同,65 $也会不断重复。有任何想法吗?感谢帮助!
#ticket scruber
library(rvest)
tix_link = paste("https://seatgeek.com/new-york-knicks-tickets#events")
tix_info = tix_link %>% read_html() %>%
html_nodes(".event-listing-title span")
link_date = read_html(tix_link)
link_date = html_nodes(link_date, ".event-listing-date")
link_time = read_html(tix_link)
link_time = html_nodes(link_time, ".event-listing-time")
link_price = read_html(tix_link)
link_price = html_node(link_price, ".event-listing-button")
link_info = read_html(tix_link)
link_info = html_node(link_info, "span")
#convert to data frame
ticket_deals = data.frame(deals = html_text(tix_info),
date = html_text(link_date),
time = html_text(link_time),
price = html_text(link_price),
correpsonding_link = html_attr(link_info,"href"))
head(ticket_deals)
deals date
1 Dallas Mavericks at New York Knicks \n Nov 14
2 Detroit Pistons at New York Knicks \n Nov 16
3 Atlanta Hawks at New York Knicks \n Nov 20
4 Portland Trail Blazers at New York Knicks \n Nov 22
5 Charlotte Hornets at New York Knicks \n Nov 25
6 Oklahoma City Thunder at New York Knicks \n Nov 28
time price
1 \n Mon 7:30 PM \n From $65
2 \n Wed 7:30 PM \n From $65
3 \n Sun 12:00 PM \n From $65
4 \n Tue 7:30 PM \n From $65
5 \n Fri 7:30 PM \n From $65
6 \n Mon 7:30 PM \n From $65
correpsonding_link
1 <NA>
2 <NA>
3 <NA>
4 <NA>
5 <NA>