我从网上获得了以下R代码并尝试运行,但始终收到错误:错误:无法加载外部实体“ http://www.baseball-reference.com/teams/ARI/1998-schedule-scores.shtml”。有任何建议可以解决吗?
library("XML")
fetch <- function(team, year) {
url <- paste0("http://www.baseball-reference.com/teams/", team, "/", year, "-schedule-scores.shtml")
raw <- readHTMLTable(url, stringsAsFactors = FALSE)
if (ncol(raw[[6]]) == 21) {
data <- raw[[6]]
} else {
data <- raw[[7]]
}
data$year <- year
data
}
# 1 - Arizona Diamondbacks
ARI <- rbind(fetch("ARI", 1998), fetch("ARI", 1999), fetch("ARI", 2000), fetch("ARI", 2001), fetch("ARI", 2002),
fetch("ARI", 2003), fetch("ARI", 2004), fetch("ARI", 2005), fetch("ARI", 2006), fetch("ARI", 2007),
fetch("ARI", 2008), fetch("ARI", 2009), fetch("ARI", 2010), fetch("ARI", 2011), fetch("ARI", 2012),
fetch("ARI", 2013), fetch("ARI", 2014))
save(ARI, file="data/raw/ARI.rda")
答案 0 :(得分:0)
我还没有真正使用过XML
软件包,但这与rvest
类似
library(rvest)
fetch <- function(team, year) {
url <- paste0("http://www.baseball-reference.com/teams/", team, "/", year, "-schedule-scores.shtml")
raw <- read_html(url) %>% html_table()
if (ncol(raw[[1]]) == 21) {
data <- raw[[1]]
} else {
data <- raw[[2]]
}
data$year <- year
data
}
然后调用不同年份的函数
ARI <- purrr::map_df(1998:2014, fetch, team = "ARI")