我正在使用pitchrx库从mlb游戏日数据中抓取数据。主要是我在击球坐标之后。刮板的工作时间到2016年,但不适用于2017年及以后的时间。
这是脚本,适用于2016年至2008年。
library(dplyr)
library(dbplyr)
library(pitchRx)
library(RSQLite)
library(XML2R)
#devtools::install_github("cpsievert/pitchrx", force = TRUE) #latest PitchRx Version
#Files to scrape
files <- c("inning/inning_hit.xml", "players.xml", "miniscoreboard.xml", "inning/inning_all.xml")
#Creat SQLite
my_db <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")
#Scrape MLB gameday
scrape(start = "2016-04-05", end = "2016-06-20", connect = my_db, suffix = files)
#Create locations data frame and fill with hit coordinates
locations <- select(tbl(my_db, "hip"), des, x, y, batter, pitcher, type, team, inning, gameday_link)
#Rename location ids
names(locations)[names(locations) == 'batter'] <- 'batter.id'
names(locations)[names(locations) == 'pitcher'] <- 'pitcher.id'
#Remove gid from
dbGetQuery(my_db, 'UPDATE hip SET gameday_link = trim(gameday_link, "gid_")')
#create batters, pitchers and stadium dataframe
batters <- select(tbl(my_db, "player"), first, last, id, bats, team_abbrev, position, avg, hr, rbi)
batters <- as.data.frame(batters)
batters <- as.data.frame(batters)
batters <- batters[!duplicated(batters$id),]
pitchers <- select(tbl(my_db, "player"), first, last, id, team_abbrev, rl)
pitchers <- as.data.frame(pitchers)
pitchers <- as.data.frame(pitchers)
pitchers <- pitchers[!duplicated(pitchers$id),]
stadium <- select(tbl(my_db, "game"), original_date, home_team_name, gameday_link)
#merge dataframes together
merge <- merge(locations, batters, by.x="batter", by.y="id")
merge2 <- merge(merge, pitchers, by.x="pitcher", by.y="id")
merge3 <- merge(merge2, stadium, by.x="gameday_link", by.y="gameday_link")
#Rename header
colnames(merge3) <- c("Gameday_Link", "Pitcher_ID", "Batter_ID", "Out_Come", "Hit_x","Hit_y", "Type", "Team", "Inning", "Batter_First_Name", "Batter_Last_Name", "Bats", "Batter_Team_Abr", "position", "avg", "hr", "rbi", "Pitcher_First_Name", "Pitcher_Last_Name", "Pitcher_Team", "Pitcher_RL", "Game_Date", "Home_Team")
#Choose columns
cols <- c(2:6, 9:23)
merge3 <- merge3[,cols]
#Write to csv
write.csv(merge3, file = "baseballData_test.csv")
这些是我在运行2018年脚本时收到的错误。
Error in function (type, msg, asError = TRUE) :
Could not resolve host:
>
> #Create locations data frame and fill with hit coordinates
> locations <- select(tbl(my_db, "hip"), des, x, y, batter, pitcher, type, team, inning, gameday_link)
Error in result_create(conn@ptr, statement) : no such table: hip
>
>
> #Rename location ids
> names(locations)[names(locations) == 'batter'] <- 'batter.id'
Error in names(locations)[names(locations) == "batter"] <- "batter.id" :
object 'locations' not found
> names(locations)[names(locations) == 'pitcher'] <- 'pitcher.id'
Error in names(locations)[names(locations) == "pitcher"] <- "pitcher.id" :
object 'locations' not found
>
> #Remove gid from
> dbGetQuery(my_db, 'UPDATE hip SET gameday_link = trim(gameday_link, "gid_")')
Error in result_create(conn@ptr, statement) : no such table: hip
>
>
> #create batters, pitchers and stadium dataframe
> batters <- select(tbl(my_db, "player"), first, last, id, bats, team_abbrev, position, avg, hr, rbi)
Error in result_create(conn@ptr, statement) : no such table: player
> batters <- as.data.frame(batters)
Error in as.data.frame(batters) : object 'batters' not found
> batters <- as.data.frame(batters)
Error in as.data.frame(batters) : object 'batters' not found
> batters <- batters[!duplicated(batters$id),]
Error: object 'batters' not found
>
> pitchers <- select(tbl(my_db, "player"), first, last, id, team_abbrev, rl)
Error in result_create(conn@ptr, statement) : no such table: player
> pitchers <- as.data.frame(pitchers)
Error in as.data.frame(pitchers) : object 'pitchers' not found
> pitchers <- as.data.frame(pitchers)
Error in as.data.frame(pitchers) : object 'pitchers' not found
> pitchers <- pitchers[!duplicated(pitchers$id),]
Error: object 'pitchers' not found
>
> stadium <- select(tbl(my_db, "game"), original_date, home_team_name, gameday_link)
Error in result_create(conn@ptr, statement) : no such table: game
>
> #merge dataframes together
> merge <- merge(locations, batters, by.x="batter", by.y="id")
Error in merge(locations, batters, by.x = "batter", by.y = "id") :
object 'locations' not found
>
> merge2 <- merge(merge, pitchers, by.x="pitcher", by.y="id")
我已阅读到该问题与路径更改有关,并且解决方法是从GitHub安装最新的库,但这不能解决我的问题。如何使该脚本适用于当前数据?