数据:
nominal_roll1 <- tribble(~"Grade",~"1991-92", ~"1992-93", ~"1993-94", ~"1994-95", ~"1995-96",~"1996-97", ~"1997-98", ~"1998-99", ~"1999-00", ~"2000-01", ~"2001-02",~"2002-03", ~"2003-04", ~"2004-05", ~"2005-06", ~"2006-07", ~"2007-08",~"2008-09", ~"2009-10", ~"2010-11", ~"2011-12", ~"2012-13", ~"2013-14",~"2014-15", ~"2015-16", ~"2016-17", ~"2017-18",
"K4", 88,92,99,101,90,99,103,111,95,92,84,92,107,86,93,82,98,92,96,121,154,137,137,145,155,160,160,
"K5", 87,89,88,102,107,94,102,106,111,102,98,88,72,89,84,108,82,115,98,93,121,154,137,137,145,155,160,
"Gr. 1", 107,102,105,104,122,114,119,134,111,125,120,113,118,121,104,109,103,113,135,88,93,121,154,137,137,137,155,
"Gr. 2", 90,113,100,109,99,118,102,105,130,104,132,128,114,108,97,99,109,98,97,87,88,93,121,154,137,137,137,
"Gr. 3", 81,86,102,102,112,108,119,103,112,121,105,121,107,113,90,101,93,101,102,97,87,88,93,121,154,154,137,
"Gr. 4", 67,84,86,91,88,105,111,113,94,114,122,127,138,109,92,92,99,89,98,90,97,87,88,93,121,121,154,
"Gr. 5", 67,76,84,94,96,97,117,112,119,109,106,104,121,145,100,102,90,103,94,98,90,97,87,88,93,93,121,
"Gr. 6", 66,76,74,83,92,95,81,113,105,102,106,106,100,115,120,107,101,89,106,127,98,90,97,87,88,88,93,
"Gr. 3", 81,77,86,85,88,88,112,96,113,110,120,111,120,121,94,126,103,110,93,83,127,98,90,97,87,87,88,
"Gr. 8", 59,76,71,68,84,74,48,85,94,85,102,124,131,111,84,113,123,104,111,88,83,127,98,90,97,97,87,
"Sr. 1", 62,62,64,89,77,73,90,82,104,122,120,106,103,177,138,149,152,174,184,88,111,83,127,98,90,90,97,
"Sr. 2", 55,78,62,68,62,76,71,131,69,85,130,132,113,141,91,175,125,159,182,182,184,111,83,127,98,98,90,
"Sr. 3", 3,71,60,51,66,44,53,97,75,59,82,143,136,136,76,108,144,126,98,98,182,184,88,83,127,127,98,
"SR. 4", 0,66,65,32,49,67,83,56,77,45,79,68,182,160,69,121,97,127,157,157,98,182,59,88,83,83,127,
"MSP", 0,1,1,1,0,0,0,0,0,0,16,20,41,10,22,36,42,38,51,NA,NA,NA,20,NA,NA,NA,NA)
nominal_tidy1 <- nominal_roll1 %>%
mutate(FakeCrudeBirthRate = rnorm(nrow(.), mean = 12.5, sd = .5),
FakeFertilityRate = rnorm(nrow(.), mean = 2.2, sd = .05)) %>%
gather(Year, Attendance, `1991-92`:`2017-18`) %>%
mutate(Year_ = as.numeric(str_trunc(.$Year, side = "right", width = 4, ellipsis = "")),
Grade = factor(Grade, levels = c("K4","K5","Gr. 1","Gr. 2","Gr. 3","Gr. 4","Gr. 5","Gr. 6","Gr. 7",
"Gr. 8","Sr. 1", "Sr. 2", "Sr. 3", "Sr. 4", "MSP")))
要计算同比变化,我可以使用lag()
:
nominal_tidy1 %>%
group_by(Year_) %>%
summarise(sum = sum(Attendance)) %>%
mutate(diff = sum-lag(sum),
perc_diff = diff/sum*100)
Year_ sum diff perc_diff
<dbl> <dbl> <dbl> <dbl>
1 1991 913 NA NA
2 1992 1149 236 20.5
3 1993 1147 -2 -0.174
4 1994 1180 33 2.80
5 1995 1232 52 4.22
6 1996 1252 20 1.60
7 1997 1311 59 4.50
8 1998 1444 133 9.21
9 1999 1409 -35 -2.48
10 2000 1375 -34 -2.47
但是,当我想按组(等级)计算更改时,使用arrange()
不会重新排序按年度比较按年更改,例如1年级1年级与2年级1年级。
nominal_tidy1 %>%
mutate(Grade = as.character(Grade)) %>%
group_by(Year_, Grade) %>%
arrange(desc(Grade)) %>%
summarise(sum = sum(Attendance)) %>%
mutate(diff = sum-lag(sum),
perc_diff = diff/sum*100)
哪一个是不正确的,即使它是正确的,也不是很可靠,因为它受arrange()
的约束。
不确定使用lag()是否出错,是否应该仅从原始df而不是整洁的角度进行测量,或者是什么。谢谢您的帮助。我想指出的是,我发现this question非常相似,但是对于分析的需求,响应似乎过于复杂。
答案 0 :(得分:1)
在这种情况下,需要切换new_project
的参数,而省略arrange()
的等级。
NA
结果
nominal_tidy1 %>%
mutate(Grade = as.character(Grade)) %>%
group_by(Grade, Year_) %>%
arrange(desc(Grade)) %>%
summarise(sum = sum(Attendance)) %>%
mutate(diff = sum-lag(sum),
perc_diff = diff/sum*100) %>%
na.omit()