我正在尝试从here抓取游泳,骑车,跑步和总时间的数据。
使用选择器小工具查找游泳时间时,结果为character(空)。
library(rvest)
library(xml2)
url <- "http://m.ironman.com/triathlon/events/americas/ironman-70.3/pucon/results.aspx"
html <- read_html(url)
swim_time <- html %>%
html_nodes('td:nth-child(6)') %>%
html_text()
感谢您的帮助。
答案 0 :(得分:0)
library(rvest)
library(dplyr)
url <- "http://m.ironman.com/triathlon/events/americas/ironman-70.3/pucon/results.aspx"
df <- read_html(url) %>% html_nodes(
#xpath = "//*[@id=\"eventResults\"]",
css = "#eventResults") %>% html_table() %>% `[[`(1)
df = df %>% select(Name,Swim,Bike,Run,Finish)
答案 1 :(得分:0)
url <- "http://m.ironman.com/triathlon/events/americas/ironman-70.3/pucon/results.aspx"
html <- read_html(url)
df <- html %>%
html_nodes('div') %>%
html_nodes(xpath = '//comment()') %>%
html_text() %>% # extract comment text
paste(collapse = '') %>% # collapse to a single string
read_html() %>%
html_nodes('tr') %>%
html_text %>%
str_trim() %>%
str_remove_all(' ') %>%
as.data.frame()
names(df) <- 'All'
df <- df %>% separate(All, c('last', 'first', 'swim', 'bike', 'run', 'div', 'gender', 'overall'), sep = '\r\n') %>%
separate(last, c('last', 'first'), sep = ',') %>%
mutate(first = gsub('[0-9]', '', first)) %>%
mutate(swim = hms(swim), bike = hms(bike), run = hms(run)) %>%
mutate(total = hms(swim + bike + run, roll = TRUE))