函数if if语句从不同的数据集读取

时间:2015-10-06 04:26:54

标签: r

我是R的新手。我正在尝试让我的函数在用户输入日期(mm / dd / yy)和季节(yyyy / yy)时输出一个表格,其中包含我在我的函数中指定的列。我对如何设置我的初始数据框感到困惑。

所以我有3个季节(3个网址)。我的IF语句应该读入一个df(称为dfmess,因为它是一团糟)。我该怎么做,希望这是有道理的,有人可以帮助我。如果有人能为我解决这个烂摊子,我将非常感激......

    Season_2015_16 <- read.csv("http://www.football-data.co.uk/mmz4281/1516/E0.csv", stringsAsFactors = FALSE)
    Season_2014_15 <- read.csv("http://www.football-data.co.uk/mmz4281/1415/E0.csv", stringsAsFactors = FALSE)
    Season_2013_14 <- read.csv("http://www.football-data.co.uk/mmz4281/1314/E0.csv", stringsAsFactors = FALSE)

    **dfmess <- read.csv("http://www.football-data.co.uk/mmz4281/1516/E0.csv", stringsAsFactors = FALSE)
    dfmess <- read.csv("http://www.football-data.co.uk/mmz4281/1415/E0.csv", stringsAsFactors = FALSE)
    dfmess <- read.csv("http://www.football-data.co.uk/mmz4281/1314/E0.csv", stringsAsFactors = FALSE)**

    EPL_Standings <- function(Standingdate, season){
      Standingdate <- as.Date(Standingdate, format = "%m/%d/%Y")
      Standingdate <- as.Date("08/30/15", format = "%m/%d/%y")
      if (season == "2015/16"){

        read.csv("http://www.football-data.co.uk/mmz4281/1516/E0.csv", stringsAsFactors = FALSE) -> dfmess

      } else if (season == "2014/15") {
        read.csv("http://www.football-data.co.uk/mmz4281/1415/E0.csv", stringsAsFactors = FALSE) -> dfmess

      } else if (season == "2013/14"){
        read.csv("http://www.football-data.co.uk/mmz4281/1314/E0.csv", stringsAsFactors = FALSE) -> dfmess

      }

    dfmess[dfmess$Date <= Standingdate, 2:6]
  #Team Record


  ddply(dfmess, 
        .(HomeTeam)
        , summarize
        , win = sum(FTR == "H")
        , Loss = sum(FTR == "A")
        , Draws = sum(FTR == "D")
  ) -> homewins1    #Homerecord
  ddply(dfmess, 
        .(AwayTeam)
        , summarize
        , win = sum(FTR == "A")
        , Loss = sum(FTR == "H")
        , Draws = sum(FTR == "D")
  ) -> awaywins1  #Awayrecord

  merge(homewins1, awaywins1, by.x = "HomeTeam", by.y = "AwayTeam", all = TRUE) -> homeawayrec

  #homeawayrec
  (homeawayrec$win.x + homeawayrec$win.y) -> homeawayrec$totalwins #adds away and home wins together, then puts on table. renamed column later
  (homeawayrec$Loss.x + homeawayrec$Loss.y) -> homeawayrec$Total.Losses #same as above, but Losses. Named consistently with what I changed too after I changed above column. 
  (homeawayrec$Draws.x + homeawayrec$Draws.y) -> homeawayrec$Total.Draws #same as above, but with Draws. 
  #names(homeawayrec) <- c("Teams","Home.Wins", "Home.Losses", "Home.Draws", "Away.Wins", "Away.Losses", "Away.Draws", "Total.Wins") #Name all columns. make sure name right thing. 

  #homeawayrec[, c("Teams", "Total.Wins", "Total.Draws", "Total.Losses")] -> WDL

  #Need to make home and away record, should be easy. 

  #homewins1 #homerecord
  #awaywins1 #awayrecord

  #Matches Played

  (homeawayrec$totalwins + homeawayrec$Total.Draws + homeawayrec$Total.Losses) -> homeawayrec$MatchesPlayed
  #homeawayrec

  #homeawayrec[, c("Teams", "MatchesPlayed")] -> Matches.Played
  #Matches.Played #number of matches played
  #POINTS (need to make it per match)

  #ddply(dfmess, 
  # .(HomeTeam)                    #DDPLY Points was eliminated because multiplying the wins by 3 and draws by 1 was easier. 
  #  , summarize
  #   , win = 3*(sum(FTR == "H"))
  #    , Loss = 0*(sum(FTR == "A"))
  #     , Draws = 1*(sum(FTR == "D"))
  #) -> HomePoints   #team points at home

  #ddply(dfmess, 
  #     .(AwayTeam)
  #    , summarize
  #   , win = 3*(sum(FTR == "A"))
  #  , Loss = 0*(sum(FTR == "H"))
  #      , Draws = 1*(sum(FTR == "D"))
  #) -> AwayPoints   #team points on road

  #merge(HomePoints, AwayPoints, by.x = "HomeTeam", by.y = "AwayTeam", all = TRUE) -> Pointshomeaway

  #(HomePoints$win +HomePoints$Draws + AwayPoints$win + AwayPoints$Draws) -> Pointshomeaway$TotalPoints

  #names(Pointshomeaway) <- c("Teams", "HomeWinPoints", "HomeLossPoints", "HomeDrawPoints", "AwayWinPoints", "AwayLossPoints", "AwayDrawPoints", "TotalPoints")

  #Pointshomeaway[, c("Teams", "TotalPoints")] -> Points.standings #table with just total points, total points standings. 
  #homeawayrec
  (3*(homeawayrec$totalwins)+(1*(homeawayrec$Total.Draws))) -> homeawayrec$Points

  #points per match and point percentage %
  #homeawayrec
  (3*(homeawayrec$totalwins)+(1*(homeawayrec$Total.Draws)))/(homeawayrec$MatchesPlayed) -> homeawayrec$PPM

  #homeawayrec[, c("Teams", "PointsPerMatch")] -> PointsPer.Standings #standings with teams and points per match. 

  #Point Percentage
  (homeawayrec$Points)/(3*(homeawayrec$MatchesPlayed)) -> homeawayrec$PtPct
  #homeawayrec
  #Goals Scored

  ddply(dfmess,                           #THIS WORKED, make sure HomeTeam and FTHG/AwayTeam and FTAG match up. Goals allowed will be the inverse for each. 
        .(HomeTeam)
        , summarize
        , goalsscored1 = sum(FTHG)
  ) -> Homegoalmade1
  ddply(dfmess, 
        .(AwayTeam)
        , summarize
        , goalsscored1 = sum(FTAG)
  ) -> Awaygoalsmade1

  merge(Homegoalmade1, Awaygoalsmade1, by.x = "HomeTeam", by.y = "AwayTeam", all = TRUE) -> goalsmade1 #merged home/away goalsmmade1 THIS WORKED
  (goalsmade1$goalsscored1.x + goalsmade1$goalsscored1.y) -> goalsmade1$allgoalsmade1 #total goalsmade

  #goalsmade1

  goalsmade1[, c("HomeTeam", "allgoalsmade1")] -> GS

  merge(homeawayrec, GS, by.x = "HomeTeam", by.y = "HomeTeam", all = TRUE) -> homeawayrec1
  #homeawayrec
  #homeawayrec1

  #Goals Scored Per Match
  ((homeawayrec1$allgoalsmade1)/(homeawayrec1$MatchesPlayed)) -> homeawayrec1$GSM
  #homeawayrec1

  #Goals Allowed
  ddply(dfmess,                           #THIS WORKED, make sure HomeTeam and FTAG/AwayTeam and FTHG match up. Goals scored will be the inverse for each. 
        .(HomeTeam)
        , summarize
        , goalsscored1 = sum(FTAG)
  ) -> Homegoalallowed1
  ddply(dfmess, 
        .(AwayTeam)
        , summarize
        , goalsscored1 = sum(FTHG)
  ) -> Awaygoalsallowed1

  merge(Homegoalallowed1, Awaygoalsallowed1, by.x = "HomeTeam", by.y = "AwayTeam", all = TRUE) -> goalsallowed1

  (goalsallowed1$goalsscored1.x + goalsallowed1$goalsscored1.y) -> goalsallowed1$GA

  #goalsallowed1

  goalsallowed1[, c("HomeTeam", "allgoalsallowed1")] -> GoalsAllowedall

  merge(homeawayrec1, goalsallowed1, by.x = "HomeTeam", by.y = "HomeTeam", all = TRUE) -> homeawayrec2

  #Goals Allowed Per Match

  ((homeawayrec2$GA)/homeawayrec2$MatchesPlayed) -> homeawayrec2$GAM
  names(homeawayrec2)
  #putting columns in correct order
  #if I rename columns the function gets confused and won't work?
  homeawayrec2[, c(1, 8, 10, 9, 2, 4, 3, 5, 7, 6, 11, 12, 13, 14, 15, 16, 19, 20)]
  #Last 10 matches played
  ((tail(homeawayrec2$totalwins, n=10)) (tail(homeawayrec2$Total.Draws, n=10)) (tail(homeawayrec2$Total.Losses, n=10)))
  tail(homeawayrec2[, c("totalwins", "Total.Draws", "Total.Losses")], n=10)
  homeawayrec2
  print(homeawayrec2)
  return(dfmess)
}
      ((homeawayrec2$GA)/homeawayrec2$MatchesPlayed) -> homeawayrec2$GAM
      names(homeawayrec2)
      homeawayrec2[, c(1, 8, 10, 9, 2, 4, 3, 5, 7, 6, 11, 12, 13, 14, 15, 16, 19, 20)]
      print(homeawayrec2)
      return(dfmess)
    }
    A <- EPL_Standings("09/26/14", "2013/14") #example numbers, placeholders. 
    head(A)

1 个答案:

答案 0 :(得分:0)

这是一个开始。通常,最好将相似的数据保存在同一个表中。

library(dplyr)
library(lubridate)
library(magrittr)

data =
  data_frame(start_season = c(2013, 2014, 2015)) %>%
  mutate(url =
           (start_season - 2000) %>%
           paste0(., . + 1) %>%
           sprintf("http://www.football-data.co.uk/mmz4281/%s/E0.csv", . ) ) %>%
  group_by(start_season) %>%
  do(read.csv(.$url)) %>%
  mutate(Date = dmy(Date) %>% as.Date) %>%
  arrange(Date)

cutoffs =
  data %>%
  group_by(start_season) %>%
  summarize(start_date = first(Date))

EPL_Standings = function(specific_date) {

  specific_date = mdy(specific_date) %>% as.Date

  this_season_so_far = 
    cutoffs %>%
    filter(specific_date > start_date) %>%
    last %>%
    left_join(data) %>%
    filter(Date <= specific_date)

  # some sort of processing here
}