我是R的新手。我正在尝试让我的函数在用户输入日期(mm / dd / yy)和季节(yyyy / yy)时输出一个表格,其中包含我在我的函数中指定的列。我对如何设置我的初始数据框感到困惑。
所以我有3个季节(3个网址)。我的IF语句应该读入一个df(称为dfmess,因为它是一团糟)。我该怎么做,希望这是有道理的,有人可以帮助我。如果有人能为我解决这个烂摊子,我将非常感激......
Season_2015_16 <- read.csv("http://www.football-data.co.uk/mmz4281/1516/E0.csv", stringsAsFactors = FALSE)
Season_2014_15 <- read.csv("http://www.football-data.co.uk/mmz4281/1415/E0.csv", stringsAsFactors = FALSE)
Season_2013_14 <- read.csv("http://www.football-data.co.uk/mmz4281/1314/E0.csv", stringsAsFactors = FALSE)
**dfmess <- read.csv("http://www.football-data.co.uk/mmz4281/1516/E0.csv", stringsAsFactors = FALSE)
dfmess <- read.csv("http://www.football-data.co.uk/mmz4281/1415/E0.csv", stringsAsFactors = FALSE)
dfmess <- read.csv("http://www.football-data.co.uk/mmz4281/1314/E0.csv", stringsAsFactors = FALSE)**
EPL_Standings <- function(Standingdate, season){
Standingdate <- as.Date(Standingdate, format = "%m/%d/%Y")
Standingdate <- as.Date("08/30/15", format = "%m/%d/%y")
if (season == "2015/16"){
read.csv("http://www.football-data.co.uk/mmz4281/1516/E0.csv", stringsAsFactors = FALSE) -> dfmess
} else if (season == "2014/15") {
read.csv("http://www.football-data.co.uk/mmz4281/1415/E0.csv", stringsAsFactors = FALSE) -> dfmess
} else if (season == "2013/14"){
read.csv("http://www.football-data.co.uk/mmz4281/1314/E0.csv", stringsAsFactors = FALSE) -> dfmess
}
dfmess[dfmess$Date <= Standingdate, 2:6]
#Team Record
ddply(dfmess,
.(HomeTeam)
, summarize
, win = sum(FTR == "H")
, Loss = sum(FTR == "A")
, Draws = sum(FTR == "D")
) -> homewins1 #Homerecord
ddply(dfmess,
.(AwayTeam)
, summarize
, win = sum(FTR == "A")
, Loss = sum(FTR == "H")
, Draws = sum(FTR == "D")
) -> awaywins1 #Awayrecord
merge(homewins1, awaywins1, by.x = "HomeTeam", by.y = "AwayTeam", all = TRUE) -> homeawayrec
#homeawayrec
(homeawayrec$win.x + homeawayrec$win.y) -> homeawayrec$totalwins #adds away and home wins together, then puts on table. renamed column later
(homeawayrec$Loss.x + homeawayrec$Loss.y) -> homeawayrec$Total.Losses #same as above, but Losses. Named consistently with what I changed too after I changed above column.
(homeawayrec$Draws.x + homeawayrec$Draws.y) -> homeawayrec$Total.Draws #same as above, but with Draws.
#names(homeawayrec) <- c("Teams","Home.Wins", "Home.Losses", "Home.Draws", "Away.Wins", "Away.Losses", "Away.Draws", "Total.Wins") #Name all columns. make sure name right thing.
#homeawayrec[, c("Teams", "Total.Wins", "Total.Draws", "Total.Losses")] -> WDL
#Need to make home and away record, should be easy.
#homewins1 #homerecord
#awaywins1 #awayrecord
#Matches Played
(homeawayrec$totalwins + homeawayrec$Total.Draws + homeawayrec$Total.Losses) -> homeawayrec$MatchesPlayed
#homeawayrec
#homeawayrec[, c("Teams", "MatchesPlayed")] -> Matches.Played
#Matches.Played #number of matches played
#POINTS (need to make it per match)
#ddply(dfmess,
# .(HomeTeam) #DDPLY Points was eliminated because multiplying the wins by 3 and draws by 1 was easier.
# , summarize
# , win = 3*(sum(FTR == "H"))
# , Loss = 0*(sum(FTR == "A"))
# , Draws = 1*(sum(FTR == "D"))
#) -> HomePoints #team points at home
#ddply(dfmess,
# .(AwayTeam)
# , summarize
# , win = 3*(sum(FTR == "A"))
# , Loss = 0*(sum(FTR == "H"))
# , Draws = 1*(sum(FTR == "D"))
#) -> AwayPoints #team points on road
#merge(HomePoints, AwayPoints, by.x = "HomeTeam", by.y = "AwayTeam", all = TRUE) -> Pointshomeaway
#(HomePoints$win +HomePoints$Draws + AwayPoints$win + AwayPoints$Draws) -> Pointshomeaway$TotalPoints
#names(Pointshomeaway) <- c("Teams", "HomeWinPoints", "HomeLossPoints", "HomeDrawPoints", "AwayWinPoints", "AwayLossPoints", "AwayDrawPoints", "TotalPoints")
#Pointshomeaway[, c("Teams", "TotalPoints")] -> Points.standings #table with just total points, total points standings.
#homeawayrec
(3*(homeawayrec$totalwins)+(1*(homeawayrec$Total.Draws))) -> homeawayrec$Points
#points per match and point percentage %
#homeawayrec
(3*(homeawayrec$totalwins)+(1*(homeawayrec$Total.Draws)))/(homeawayrec$MatchesPlayed) -> homeawayrec$PPM
#homeawayrec[, c("Teams", "PointsPerMatch")] -> PointsPer.Standings #standings with teams and points per match.
#Point Percentage
(homeawayrec$Points)/(3*(homeawayrec$MatchesPlayed)) -> homeawayrec$PtPct
#homeawayrec
#Goals Scored
ddply(dfmess, #THIS WORKED, make sure HomeTeam and FTHG/AwayTeam and FTAG match up. Goals allowed will be the inverse for each.
.(HomeTeam)
, summarize
, goalsscored1 = sum(FTHG)
) -> Homegoalmade1
ddply(dfmess,
.(AwayTeam)
, summarize
, goalsscored1 = sum(FTAG)
) -> Awaygoalsmade1
merge(Homegoalmade1, Awaygoalsmade1, by.x = "HomeTeam", by.y = "AwayTeam", all = TRUE) -> goalsmade1 #merged home/away goalsmmade1 THIS WORKED
(goalsmade1$goalsscored1.x + goalsmade1$goalsscored1.y) -> goalsmade1$allgoalsmade1 #total goalsmade
#goalsmade1
goalsmade1[, c("HomeTeam", "allgoalsmade1")] -> GS
merge(homeawayrec, GS, by.x = "HomeTeam", by.y = "HomeTeam", all = TRUE) -> homeawayrec1
#homeawayrec
#homeawayrec1
#Goals Scored Per Match
((homeawayrec1$allgoalsmade1)/(homeawayrec1$MatchesPlayed)) -> homeawayrec1$GSM
#homeawayrec1
#Goals Allowed
ddply(dfmess, #THIS WORKED, make sure HomeTeam and FTAG/AwayTeam and FTHG match up. Goals scored will be the inverse for each.
.(HomeTeam)
, summarize
, goalsscored1 = sum(FTAG)
) -> Homegoalallowed1
ddply(dfmess,
.(AwayTeam)
, summarize
, goalsscored1 = sum(FTHG)
) -> Awaygoalsallowed1
merge(Homegoalallowed1, Awaygoalsallowed1, by.x = "HomeTeam", by.y = "AwayTeam", all = TRUE) -> goalsallowed1
(goalsallowed1$goalsscored1.x + goalsallowed1$goalsscored1.y) -> goalsallowed1$GA
#goalsallowed1
goalsallowed1[, c("HomeTeam", "allgoalsallowed1")] -> GoalsAllowedall
merge(homeawayrec1, goalsallowed1, by.x = "HomeTeam", by.y = "HomeTeam", all = TRUE) -> homeawayrec2
#Goals Allowed Per Match
((homeawayrec2$GA)/homeawayrec2$MatchesPlayed) -> homeawayrec2$GAM
names(homeawayrec2)
#putting columns in correct order
#if I rename columns the function gets confused and won't work?
homeawayrec2[, c(1, 8, 10, 9, 2, 4, 3, 5, 7, 6, 11, 12, 13, 14, 15, 16, 19, 20)]
#Last 10 matches played
((tail(homeawayrec2$totalwins, n=10)) (tail(homeawayrec2$Total.Draws, n=10)) (tail(homeawayrec2$Total.Losses, n=10)))
tail(homeawayrec2[, c("totalwins", "Total.Draws", "Total.Losses")], n=10)
homeawayrec2
print(homeawayrec2)
return(dfmess)
}
((homeawayrec2$GA)/homeawayrec2$MatchesPlayed) -> homeawayrec2$GAM
names(homeawayrec2)
homeawayrec2[, c(1, 8, 10, 9, 2, 4, 3, 5, 7, 6, 11, 12, 13, 14, 15, 16, 19, 20)]
print(homeawayrec2)
return(dfmess)
}
A <- EPL_Standings("09/26/14", "2013/14") #example numbers, placeholders.
head(A)
答案 0 :(得分:0)
这是一个开始。通常,最好将相似的数据保存在同一个表中。
library(dplyr)
library(lubridate)
library(magrittr)
data =
data_frame(start_season = c(2013, 2014, 2015)) %>%
mutate(url =
(start_season - 2000) %>%
paste0(., . + 1) %>%
sprintf("http://www.football-data.co.uk/mmz4281/%s/E0.csv", . ) ) %>%
group_by(start_season) %>%
do(read.csv(.$url)) %>%
mutate(Date = dmy(Date) %>% as.Date) %>%
arrange(Date)
cutoffs =
data %>%
group_by(start_season) %>%
summarize(start_date = first(Date))
EPL_Standings = function(specific_date) {
specific_date = mdy(specific_date) %>% as.Date
this_season_so_far =
cutoffs %>%
filter(specific_date > start_date) %>%
last %>%
left_join(data) %>%
filter(Date <= specific_date)
# some sort of processing here
}