data=data.frame("student"=c(1,1,1,1,2,2,2,3,3,3,3,4,4,4,4),
"year"=c(2014,2015,2016,2017,2014,2015,2016,2014,2015,2016,2017,2014,2015,2016,2017),
"grade"=c(9,10,11,12,9,10,11,9,10,11,12,9,10,11,12),
"scoreA"=c(0,0,0,0,0,0,2,0,1,1,2,0,0,1,1),
"scoreB"=c(0,0,0,0,0,0,2,0,0,1,2,0,0,0,1),
"WANTgradeA"=c(12,12,12,12,11,11,11,10,10,10,10,11,11,11,11),
"WANTscoreA"=c(0,0,0,0,2,2,2,1,1,1,1,1,1,1,1),
"WANTgradeB"=c(12,12,12,12,11,11,11,11,11,11,11,12,12,12,12),
"WANTscoreB"=c(0,0,0,0,2,2,2,1,1,1,1,1,1,1,1))
这是我的数据。我的预期输出是所有以“ WANT”开头的变量
如果scoreA从未超过0,则WANTgradeA等于GRADE的最大值 WANTscoreA等于scoreA的最大值。
当scoreB大于0并且
我的尝试未能添加这4个新变量,而是砍掉了数据
data %>%
group_by(student) %>%
filter(value == max(score > 0))
答案 0 :(得分:1)
这是一个dplyr
解决方案-
df %>%
group_by(student) %>%
mutate(
wgradeA = case_when(
any(scoreA > 0) ~ grade[scoreA > 0][1],
TRUE ~ max(grade)
),
wscoreA = scoreA[match(TRUE, scoreA > 0, nomatch = 1)],
wgradeB = case_when(
any(scoreB > 0) ~ grade[scoreB > 0][1],
TRUE ~ max(grade)
),
wscoreB = scoreB[match(TRUE, scoreB > 0, nomatch = 1)]
) %>%
ungroup()
# A tibble: 15 x 13
student year grade scoreA scoreB WANTgradeA WANTscoreA WANTgradeB WANTscoreB wgradeA wscoreA wgradeB wscoreB
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 2014 9 0 0 12 0 12 0 12 0 12 0
2 1 2015 10 0 0 12 0 12 0 12 0 12 0
3 1 2016 11 0 0 12 0 12 0 12 0 12 0
4 1 2017 12 0 0 12 0 12 0 12 0 12 0
5 2 2014 9 0 0 11 2 11 2 11 2 11 2
6 2 2015 10 0 0 11 2 11 2 11 2 11 2
7 2 2016 11 2 2 11 2 11 2 11 2 11 2
8 3 2014 9 0 0 10 1 11 1 10 1 11 1
9 3 2015 10 1 0 10 1 11 1 10 1 11 1
10 3 2016 11 1 1 10 1 11 1 10 1 11 1
11 3 2017 12 2 2 10 1 11 1 10 1 11 1
12 4 2014 9 0 0 11 1 12 1 11 1 12 1
13 4 2015 10 0 0 11 1 12 1 11 1 12 1
14 4 2016 11 1 0 11 1 12 1 11 1 12 1
15 4 2017 12 1 1 11 1 12 1 11 1 12 1