与R的橄榄球形式桌

时间:2016-08-19 09:47:45

标签: r data-manipulation

我想从足球赛结果中创建R中的最后5(x)个比赛表。这是我目前所处的例子。

加载数据

#Last seasons premier league results
S1516 <- read.csv("http://www.football-data.co.uk/mmz4281/1516/E0.csv")

#transform Date to date value
S1516$Date <- as.Date(S1516$Date, format="%d/%m/%y")

这里我创建了函数,以特定日期创建联赛表,以这两个帖子为例。 http://opisthokonta.net/?p=18https://learnr.wordpress.com/2009/03/19/data-manipulation-football-league-table/

这是一个示例,我当前的league.table函数将给出的内容。所有19场比赛一直持续到#31; 2015-31-12&#34;计算,但我想每个团队只有最后5场比赛。任何有关修改当前函数的帮助都是相关的。

> head(league.table(S1516, "2015-12-31"), 10)
               PLD HW HD HL AW AD AL GF GA GD PTS
Arsenal         19  6  2  1  6  1  3 33 18 15  39
Leicester       19  5  3  1  6  3  1 37 25 12  39
Man City        19  8  0  2  3  3  3 37 20 17  36
Tottenham       19  5  4  1  4  4  1 33 15 18  35
Crystal Palace  19  4  2  4  5  2  2 23 16  7  31
Man United      19  4  4  1  4  2  4 22 16  6  30
Liverpool       19  4  3  2  4  3  3 22 22  0  30
West Ham        19  3  4  2  4  4  2 28 23  5  29
Watford         19  4  2  4  4  3  2 24 20  4  29
Stoke           19  4  1  4  4  4  2 20 19  1  29

我目前的联赛表功能:

league.table <- function(df, fdate="2100-01-01"){

  df <- subset(df, Date <= fdate)

  #points awarded for a match outcome  
  winPts <- 3
  drawPts <- 1
  loosePts <- 0

  numMatches <- length(df$HomeTeam)

  teams <- levels(factor(c(as.character(df$HomeTeam), as.character(df$AwayTeam))))
  numTeams <- length(teams)

  #vector with outcome of a match (H, D or A)
  home <- df$FTHG > df$FTAG 
  away <- df$FTAG  > df$FTHG
  draws <- df$FTHG == df$FTAG 

  results <- character(length(df$FTHG))
  results[draws] <- "D"
  results[home] <- "H"
  results[away] <- "A"

  #for output
  homeWins <- numeric(numTeams)
  homeDraws <- numeric(numTeams)
  homeLoss <- numeric(numTeams)
  awayWins <- numeric(numTeams)
  awayDraws <- numeric(numTeams)
  awayLoss <- numeric(numTeams)
  goalsFor <- numeric(numTeams)
  goalsAgainst <- numeric(numTeams)
  goalsDifference <- numeric(numTeams)
  playedMatches <- numeric(numTeams)
  pts <- numeric(numTeams)

  for (t in 1:numTeams) {
    #mathc results for a given team
    homeResults <- results[df$HomeTeam == teams[t]]
    awayResults <- results[df$AwayTeam == teams[t]]

    playedMatches[t] <- length(homeResults) + length(awayResults)

    goalsForH <- sum(df$FTHG[df$HomeTeam == teams[t]])
    goalsForA <- sum(df$FTAG[df$AwayTeam == teams[t]])
    goalsFor[t] <- goalsForA + goalsForH
    goalsAgainstH <- sum(df$FTAG[df$HomeTeam == teams[t]])
    goalsAgainstA <- sum(df$FTHG[df$AwayTeam == teams[t]])
    goalsAgainst[t] <- goalsAgainstA + goalsAgainstH
    goalsDifference[t] <- goalsFor[t] - goalsAgainst[t]

    homeWins[t] <- sum(homeResults == "H")
    homeDraws[t] <- sum(homeResults == "D")
    homeLoss[t] <- sum(homeResults == "A")
    awayWins[t] <- sum(awayResults == "A")
    awayDraws[t] <- sum(awayResults == "D")
    awayLoss[t] <- sum(awayResults == "H")

    totWins <- homeWins[t] + awayWins[t]
    totDraws <- homeDraws[t] + awayDraws[t]
    totLoss <- homeLoss[t] + awayLoss[t]

    pts[t] <- (winPts * totWins) + (drawPts * totDraws) + (loosePts * totLoss)

  }

  table <- data.frame(cbind(playedMatches, homeWins, homeDraws, 
                            homeLoss, awayWins, awayDraws, awayLoss, 
                            goalsFor, goalsAgainst, goalsDifference, pts),
                      row.names=teams)


  names(table) <- c("PLD", "HW", "HD", "HL", "AW", "AD", "AL", "GF", "GA", "GD", "PTS")
  ord <- order(-table$PTS, -table$GD, -table$GF)
  table <- table[ord, ]

  return(table)

}

1 个答案:

答案 0 :(得分:0)

试一试。

我对您的代码进行了一处更改:

S1516 <- read.csv("http://www.football-data.co.uk/mmz4281/1516/E0.csv", stringsAsFactors = TRUE)

以下功能将根据最后日期和最后一个游戏数量构建新数据集。它可能会更有效,但它解决了这个问题。

leagueInfo <- function(data, lastDate, lastNumberOfGames){
    # Create a list of teams
    teams <- sort(unique(data$HomeTeam))

    # Find the last N dates that each team played.
    bFirst <- TRUE
    for (team in teams)
    {
        tempSubset <- subset(data, data$HomeTeam == team | data$AwayTeam == team)

        theDates <- sort(unique(tempSubset$Date))
        lastNDates <- tail(subset(theDates, theDates<=lastDate), n=lastNumberOfGames)

        # Data for this team for the last N dates.
        tempSubset2 <- subset(tempSubset, tempSubset$Date %in% lastNDates)

        # Row bind each subset to a new data set.
        if (bFirst)
        {
            newData <- tempSubset2
            bFirst <- FALSE
        }
        else
        {
            newData <- rbind(tempSubset2, newData)
        }
    }
    # Remove duplicates
    newData<-unique(newData)
    return(newData)
}

运行以下命令:

# Last 5 games played until 12/31/2015
lastDate <-"2015-12-31"
lastNumberOfGames <- 5
newData <- leagueInfo(S1516, lastDate, lastNumberOfGames)
league.table(newData, lastDate )

结果:

               PLD HW HD HL AW AD AL GF GA GD PTS
Arsenal          5  3  0  0  1  0  1  9  6  3  12
Tottenham        5  1  0  1  2  1  0  9  4  5  10
Watford          5  2  0  1  1  1  0  9  4  5  10
Stoke            5  2  0  1  1  1  0  9  5  4  10
Leicester        5  1  1  0  2  0  1  8  4  4  10
Bournemouth      5  1  1  0  2  0  1  5  4  1  10
Crystal Palace   5  1  1  0  1  2  0  4  2  2   9
Man City         5  2  0  0  0  1  2  7  6  1   7
Newcastle        5  1  1  1  1  0  1  5  4  1   7
West Ham         5  1  1  0  0  3  0  3  2  1   7
Norwich          5  1  1  0  1  0  2  5  7 -2   7
Liverpool        5  1  1  0  1  0  2  4  7 -3   7
Chelsea          5  1  1  1  0  1  1  6  6  0   5
Everton          5  0  1  2  1  1  0  8  9 -1   5
West Brom        5  1  1  1  0  1  1  5  6 -1   5
Swansea          5  1  1  1  0  1  1  2  5 -3   5
Southampton      5  1  1  1  0  0  2  6  6  0   4
Aston Villa      5  0  1  1  0  2  1  3  7 -4   3
Man United       5  0  2  1  0  0  2  2  6 -4   2
Sunderland       5  0  0  2  0  0  3  3 12 -9   0