我有一个包含以下列的数据框:
我需要为每个home_lineup和每个away_lineup计算游戏的home_plusminus和away_plusminus的总和。
数据如下:
game_id home_lineup awaylineup home_Plusminus Away_Plusminus home_team away_team
12345 L1 L2 -2 2 BOS ATL
12345 L3 L4 3 -3 BOS ATL
12345 L3 L4 3 -3 BOS ATL
45678 L2 L1 3 -3 ATL BOS
45678 L2 L7 1 -1 ATL BOS
45678 L8 L1 3 -3 ATL BOS
以上数据显示有2场比赛。
我希望最终输出看起来像这样:
Team Lineup PlusMinus Pergame
BOS L1 -8 -4.0
BOS L3 6 6.0
BOS L7 -1 -1.0
ATL L2 6 3.0
ATL L4 -6 -6.0
ATL L8 3 3.0
因此,在上面的示例中,L1进行了两场比赛,总负负为-8。 L3只参加了一场比赛。
答案 0 :(得分:1)
这是使用tidyr
和dplyr
的方法。
library(tidyr); library(dplyr)
# Step 1 - make into tidy data frame with one row per observation
home <- df %>% select(game_id, contains("home")) %>%
rename("Lineup" = "home_lineup", "Team" = "home_team", "plusminus" = "home_Plusminus")
away <- df %>% select(game_id, contains("away")) %>%
rename("Lineup" = "awaylineup", "Team" = "away_team", "plusminus" = "Away_Plusminus")
tidy <- bind_rows(home, away, .id = "location")
# Step 2 - summarize
output <- tidy %>%
group_by(Team, Lineup) %>%
summarize(PlusMinus = sum(plusminus),
PerGame = PlusMinus/n_distinct(game_id)) %>% ungroup()
输出:
> output
# A tibble: 6 x 4
Team Lineup PlusMinus PerGame
<chr> <chr> <int> <dbl>
1 ATL L2 6 3
2 ATL L4 -6 -6
3 ATL L8 3 3
4 BOS L1 -8 -4
5 BOS L3 6 6
6 BOS L7 -1 -1
样本数据:
df <- read.table(header = T, stringsAsFactors = F, text = "
game_id home_lineup awaylineup home_Plusminus Away_Plusminus home_team away_team
12345 L1 L2 -2 2 BOS ATL
12345 L3 L4 3 -3 BOS ATL
12345 L3 L4 3 -3 BOS ATL
45678 L2 L1 3 -3 ATL BOS
45678 L2 L7 1 -1 ATL BOS
45678 L8 L1 3 -3 ATL BOS")
答案 1 :(得分:1)
乔恩的类似解决方案:
library(tidyverse)
dat <- tribble(
~game_id, ~home_lineup, ~awaylineup, ~home_Plusminus, ~Away_Plusminus, ~home_team, ~away_team,
12345, "L1", "L2", -2, 2, "BOS", "ATL",
12345, "L3", "L4", 3, -3, "BOS", "ATL",
# 12345, "L3", "L4", 3, -3, "BOS", "ATL",
45678, "L2", "L1", 3, -3, "ATL", "BOS",
45678, "L2", "L7", 1, -1, "ATL", "BOS",
45678, "L8", "L1", 3, -3, "ATL", "BOS"
)
long <-
dat %>%
gather(where, team, home_team:away_team) %>%
mutate(
home_lineup = case_when(where == "home_team" ~ home_lineup,
TRUE ~ NA_character_),
away_lineup = case_when(where == "away_team" ~ awaylineup,
TRUE ~ NA_character_),
home_plusminus = case_when(where == "home_team" ~ home_Plusminus,
TRUE ~ NA_real_),
away_plusminus = case_when(where == "away_team" ~ Away_Plusminus,
TRUE ~ NA_real_)
) %>%
select(-home_Plusminus, -Away_Plusminus, -awaylineup) %>%
gather(plus_minus_type, plus_minus, home_plusminus:away_plusminus) %>%
gather(lineup_type, lineup, home_lineup:away_lineup, -where, -team) %>%
mutate(
where = where %>% str_remove("_team"),
lineup_type = lineup_type %>% str_remove("_") %>% str_remove("lineup"),
plus_minus_type = lineup_type %>% str_remove("_Plusminus")
) %>%
drop_na()
long %>%
group_by(
team, lineup
) %>%
summarise(
PlusMinus = sum(plus_minus),
Pergame = sum(plus_minus) / n()
)
#> # A tibble: 6 x 4
#> # Groups: team [?]
#> team lineup PlusMinus Pergame
#> <chr> <chr> <dbl> <dbl>
#> 1 ATL L2 6 2
#> 2 ATL L4 -3 -3
#> 3 ATL L8 3 3
#> 4 BOS L1 -8 -2.67
#> 5 BOS L3 3 3
#> 6 BOS L7 -1 -1
由reprex package(v0.2.1)于2018-10-26创建