使用Pitchrx的mlb gameday抓取工具不会抓取当前数据

时间:2018-12-21 04:49:41

标签: r

我正在使用pitchrx库从mlb游戏日数据中抓取数据。主要是我在击球坐标之后。刮板的工作时间到2016年,但不适用于2017年及以后的时间。

这是脚本,适用于2016年至2008年。

library(dplyr)
library(dbplyr)
library(pitchRx)
library(RSQLite)
library(XML2R)
#devtools::install_github("cpsievert/pitchrx", force = TRUE) #latest PitchRx Version

#Files to scrape
files <- c("inning/inning_hit.xml", "players.xml", "miniscoreboard.xml", "inning/inning_all.xml")

#Creat SQLite
my_db <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")

#Scrape MLB gameday
scrape(start = "2016-04-05", end = "2016-06-20", connect = my_db, suffix = files)

#Create locations data frame and fill with hit coordinates
locations <- select(tbl(my_db, "hip"), des, x, y, batter, pitcher, type, team, inning, gameday_link)


#Rename location ids
names(locations)[names(locations) == 'batter'] <- 'batter.id'
names(locations)[names(locations) == 'pitcher'] <- 'pitcher.id'

#Remove gid from
dbGetQuery(my_db, 'UPDATE hip SET gameday_link = trim(gameday_link, "gid_")')


#create batters, pitchers and stadium dataframe
batters <- select(tbl(my_db, "player"), first, last, id, bats, team_abbrev, position, avg, hr, rbi)
batters <- as.data.frame(batters)
batters <- as.data.frame(batters)
batters <- batters[!duplicated(batters$id),]

pitchers <- select(tbl(my_db, "player"), first, last, id, team_abbrev, rl)
pitchers <- as.data.frame(pitchers)
pitchers <- as.data.frame(pitchers)
pitchers <- pitchers[!duplicated(pitchers$id),]

stadium <- select(tbl(my_db, "game"), original_date, home_team_name, gameday_link)  

#merge dataframes together
merge <- merge(locations, batters, by.x="batter", by.y="id")

merge2 <- merge(merge, pitchers, by.x="pitcher", by.y="id")

merge3 <- merge(merge2, stadium, by.x="gameday_link", by.y="gameday_link")

#Rename header
colnames(merge3) <- c("Gameday_Link", "Pitcher_ID", "Batter_ID", "Out_Come", "Hit_x","Hit_y", "Type", "Team", "Inning", "Batter_First_Name", "Batter_Last_Name", "Bats", "Batter_Team_Abr", "position", "avg", "hr", "rbi", "Pitcher_First_Name", "Pitcher_Last_Name", "Pitcher_Team", "Pitcher_RL", "Game_Date", "Home_Team")

#Choose columns
cols <- c(2:6, 9:23)
merge3 <- merge3[,cols]

#Write to csv
write.csv(merge3, file = "baseballData_test.csv")

这些是我在运行2018年脚本时收到的错误。

Error in function (type, msg, asError = TRUE)  : 
  Could not resolve host:  
> 
> #Create locations data frame and fill with hit coordinates
> locations <- select(tbl(my_db, "hip"), des, x, y, batter, pitcher, type, team, inning, gameday_link)
Error in result_create(conn@ptr, statement) : no such table: hip
> 
> 
> #Rename location ids
> names(locations)[names(locations) == 'batter'] <- 'batter.id'
Error in names(locations)[names(locations) == "batter"] <- "batter.id" : 
  object 'locations' not found
> names(locations)[names(locations) == 'pitcher'] <- 'pitcher.id'
Error in names(locations)[names(locations) == "pitcher"] <- "pitcher.id" : 
  object 'locations' not found
> 
> #Remove gid from
> dbGetQuery(my_db, 'UPDATE hip SET gameday_link = trim(gameday_link, "gid_")')
Error in result_create(conn@ptr, statement) : no such table: hip
> 
> 
> #create batters, pitchers and stadium dataframe
> batters <- select(tbl(my_db, "player"), first, last, id, bats, team_abbrev, position, avg, hr, rbi)
Error in result_create(conn@ptr, statement) : no such table: player
> batters <- as.data.frame(batters)
Error in as.data.frame(batters) : object 'batters' not found
> batters <- as.data.frame(batters)
Error in as.data.frame(batters) : object 'batters' not found
> batters <- batters[!duplicated(batters$id),]
Error: object 'batters' not found
> 
> pitchers <- select(tbl(my_db, "player"), first, last, id, team_abbrev, rl)
Error in result_create(conn@ptr, statement) : no such table: player
> pitchers <- as.data.frame(pitchers)
Error in as.data.frame(pitchers) : object 'pitchers' not found
> pitchers <- as.data.frame(pitchers)
Error in as.data.frame(pitchers) : object 'pitchers' not found
> pitchers <- pitchers[!duplicated(pitchers$id),]
Error: object 'pitchers' not found
> 
> stadium <- select(tbl(my_db, "game"), original_date, home_team_name, gameday_link)  
Error in result_create(conn@ptr, statement) : no such table: game
> 
> #merge dataframes together
> merge <- merge(locations, batters, by.x="batter", by.y="id")
Error in merge(locations, batters, by.x = "batter", by.y = "id") : 
  object 'locations' not found
> 
> merge2 <- merge(merge, pitchers, by.x="pitcher", by.y="id")

我已阅读到该问题与路径更改有关,并且解决方法是从GitHub安装最新的库,但这不能解决我的问题。如何使该脚本适用于当前数据?

0 个答案:

没有答案