我想从足球赛结果中创建R中的最后5(x)个比赛表。这是我目前所处的例子。
加载数据
#Last seasons premier league results
S1516 <- read.csv("http://www.football-data.co.uk/mmz4281/1516/E0.csv")
#transform Date to date value
S1516$Date <- as.Date(S1516$Date, format="%d/%m/%y")
这里我创建了函数,以特定日期创建联赛表,以这两个帖子为例。 http://opisthokonta.net/?p=18和https://learnr.wordpress.com/2009/03/19/data-manipulation-football-league-table/
这是一个示例,我当前的league.table
函数将给出的内容。所有19场比赛一直持续到#31; 2015-31-12&#34;计算,但我想每个团队只有最后5场比赛。任何有关修改当前函数的帮助都是相关的。
> head(league.table(S1516, "2015-12-31"), 10)
PLD HW HD HL AW AD AL GF GA GD PTS
Arsenal 19 6 2 1 6 1 3 33 18 15 39
Leicester 19 5 3 1 6 3 1 37 25 12 39
Man City 19 8 0 2 3 3 3 37 20 17 36
Tottenham 19 5 4 1 4 4 1 33 15 18 35
Crystal Palace 19 4 2 4 5 2 2 23 16 7 31
Man United 19 4 4 1 4 2 4 22 16 6 30
Liverpool 19 4 3 2 4 3 3 22 22 0 30
West Ham 19 3 4 2 4 4 2 28 23 5 29
Watford 19 4 2 4 4 3 2 24 20 4 29
Stoke 19 4 1 4 4 4 2 20 19 1 29
我目前的联赛表功能:
league.table <- function(df, fdate="2100-01-01"){
df <- subset(df, Date <= fdate)
#points awarded for a match outcome
winPts <- 3
drawPts <- 1
loosePts <- 0
numMatches <- length(df$HomeTeam)
teams <- levels(factor(c(as.character(df$HomeTeam), as.character(df$AwayTeam))))
numTeams <- length(teams)
#vector with outcome of a match (H, D or A)
home <- df$FTHG > df$FTAG
away <- df$FTAG > df$FTHG
draws <- df$FTHG == df$FTAG
results <- character(length(df$FTHG))
results[draws] <- "D"
results[home] <- "H"
results[away] <- "A"
#for output
homeWins <- numeric(numTeams)
homeDraws <- numeric(numTeams)
homeLoss <- numeric(numTeams)
awayWins <- numeric(numTeams)
awayDraws <- numeric(numTeams)
awayLoss <- numeric(numTeams)
goalsFor <- numeric(numTeams)
goalsAgainst <- numeric(numTeams)
goalsDifference <- numeric(numTeams)
playedMatches <- numeric(numTeams)
pts <- numeric(numTeams)
for (t in 1:numTeams) {
#mathc results for a given team
homeResults <- results[df$HomeTeam == teams[t]]
awayResults <- results[df$AwayTeam == teams[t]]
playedMatches[t] <- length(homeResults) + length(awayResults)
goalsForH <- sum(df$FTHG[df$HomeTeam == teams[t]])
goalsForA <- sum(df$FTAG[df$AwayTeam == teams[t]])
goalsFor[t] <- goalsForA + goalsForH
goalsAgainstH <- sum(df$FTAG[df$HomeTeam == teams[t]])
goalsAgainstA <- sum(df$FTHG[df$AwayTeam == teams[t]])
goalsAgainst[t] <- goalsAgainstA + goalsAgainstH
goalsDifference[t] <- goalsFor[t] - goalsAgainst[t]
homeWins[t] <- sum(homeResults == "H")
homeDraws[t] <- sum(homeResults == "D")
homeLoss[t] <- sum(homeResults == "A")
awayWins[t] <- sum(awayResults == "A")
awayDraws[t] <- sum(awayResults == "D")
awayLoss[t] <- sum(awayResults == "H")
totWins <- homeWins[t] + awayWins[t]
totDraws <- homeDraws[t] + awayDraws[t]
totLoss <- homeLoss[t] + awayLoss[t]
pts[t] <- (winPts * totWins) + (drawPts * totDraws) + (loosePts * totLoss)
}
table <- data.frame(cbind(playedMatches, homeWins, homeDraws,
homeLoss, awayWins, awayDraws, awayLoss,
goalsFor, goalsAgainst, goalsDifference, pts),
row.names=teams)
names(table) <- c("PLD", "HW", "HD", "HL", "AW", "AD", "AL", "GF", "GA", "GD", "PTS")
ord <- order(-table$PTS, -table$GD, -table$GF)
table <- table[ord, ]
return(table)
}
答案 0 :(得分:0)
试一试。
我对您的代码进行了一处更改:
S1516 <- read.csv("http://www.football-data.co.uk/mmz4281/1516/E0.csv", stringsAsFactors = TRUE)
以下功能将根据最后日期和最后一个游戏数量构建新数据集。它可能会更有效,但它解决了这个问题。
leagueInfo <- function(data, lastDate, lastNumberOfGames){
# Create a list of teams
teams <- sort(unique(data$HomeTeam))
# Find the last N dates that each team played.
bFirst <- TRUE
for (team in teams)
{
tempSubset <- subset(data, data$HomeTeam == team | data$AwayTeam == team)
theDates <- sort(unique(tempSubset$Date))
lastNDates <- tail(subset(theDates, theDates<=lastDate), n=lastNumberOfGames)
# Data for this team for the last N dates.
tempSubset2 <- subset(tempSubset, tempSubset$Date %in% lastNDates)
# Row bind each subset to a new data set.
if (bFirst)
{
newData <- tempSubset2
bFirst <- FALSE
}
else
{
newData <- rbind(tempSubset2, newData)
}
}
# Remove duplicates
newData<-unique(newData)
return(newData)
}
运行以下命令:
# Last 5 games played until 12/31/2015
lastDate <-"2015-12-31"
lastNumberOfGames <- 5
newData <- leagueInfo(S1516, lastDate, lastNumberOfGames)
league.table(newData, lastDate )
结果:
PLD HW HD HL AW AD AL GF GA GD PTS
Arsenal 5 3 0 0 1 0 1 9 6 3 12
Tottenham 5 1 0 1 2 1 0 9 4 5 10
Watford 5 2 0 1 1 1 0 9 4 5 10
Stoke 5 2 0 1 1 1 0 9 5 4 10
Leicester 5 1 1 0 2 0 1 8 4 4 10
Bournemouth 5 1 1 0 2 0 1 5 4 1 10
Crystal Palace 5 1 1 0 1 2 0 4 2 2 9
Man City 5 2 0 0 0 1 2 7 6 1 7
Newcastle 5 1 1 1 1 0 1 5 4 1 7
West Ham 5 1 1 0 0 3 0 3 2 1 7
Norwich 5 1 1 0 1 0 2 5 7 -2 7
Liverpool 5 1 1 0 1 0 2 4 7 -3 7
Chelsea 5 1 1 1 0 1 1 6 6 0 5
Everton 5 0 1 2 1 1 0 8 9 -1 5
West Brom 5 1 1 1 0 1 1 5 6 -1 5
Swansea 5 1 1 1 0 1 1 2 5 -3 5
Southampton 5 1 1 1 0 0 2 6 6 0 4
Aston Villa 5 0 1 1 0 2 1 3 7 -4 3
Man United 5 0 2 1 0 0 2 2 6 -4 2
Sunderland 5 0 0 2 0 0 3 3 12 -9 0