我有一个带有ID列和几列的数据框(每个月一个)。每次运行程序时,月份列数都会有所不同。我正在编写R代码以将月份列名称提取到向量,并且对于向量的每个元素,我尝试按ID变量进行分组并获取每个组的总和。以下是我使用的代码。一切正常,期望结果是总和而不是组和。非常感谢任何帮助。
Current pattern of result:
newdf for a given month:
ID summ_mon_2017_12
1 20
2 20
3 20
Expected pattern of result:
newdf for a given month:
ID summ_mon_2017_12
1 8
2 5
3 7
library(dplyr)
ID <- c(1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3)
mon_2017_12<-c(rep(rnorm(14)))
mon_2018_01<-c(rep(rnorm(14)))
mon_2018_02<-c(rep(rnorm(14)))
mon_2018_03<-c(rep(rnorm(14)))
mon_2018_04<-c(rep(rnorm(14)))
mon_2018_05<-c(rep(rnorm(14)))
groupsum<-data.frame(ID,mon_2017_12,mon_2018_01,mon_2018_02,mon_2018_03,mon_2018_04,mon_2018_05)
#extract month columns
month_vec <- as.vector(unique(substring(names(groupsum %>%
select(contains("mon_"))),1,11)))
addsummvar<-function(df, n1){
newvar<-paste("summ",n1,sep="_")
newdf<-paste("summ",n1,sep="_")
print(newvar)
print(newdf)
newdf<- df %>%
group_by(ID) %>%
summarise(
!!newvar := sum(df[n1])
)
print(newdf)
}
for(i in 1:length(month_vec)) {
summ_del <- addsummvar(df=groupsum, n1=month_vec[i])
}
答案 0 :(得分:1)
这个单行不能帮助你的目的吗?
library(dplyr)
ID <- c(1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3)
mon_2017_12<-c(rep(rnorm(14)))
mon_2018_01<-c(rep(rnorm(14)))
mon_2018_02<-c(rep(rnorm(14)))
mon_2018_03<-c(rep(rnorm(14)))
mon_2018_04<-c(rep(rnorm(14)))
mon_2018_05<-c(rep(rnorm(14)))
groupsum<-data.frame(ID,mon_2017_12,mon_2018_01,mon_2018_02,mon_2018_03,mon_2018_04,mon_2018_05)
groupsum %>% group_by(ID) %>% summarise_all(.funs = sum)
输出:
> groupsum %>% group_by(ID) %>% summarise_all(.funs = sum)
# A tibble: 3 x 7
ID mon_2017_12 mon_2018_01 mon_2018_02 mon_2018_03 mon_2018_04 mon_2018_05
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1.00 -0.00419 0.230 2.87 -1.38 -2.63 2.12
2 2.00 -1.12 1.45 -1.08 -0.907 -0.966 2.86
3 3.00 -2.11 3.49 0.282 -1.35 6.30 -3.60
更新了使用summarise_if
过滤掉非数字列的代码:
library(dplyr)
set.seed(123)
ID <- c(1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3)
mon_2017_12<-c(rep(rnorm(14)))
mon_2018_01<-c(rep(rnorm(14)))
mon_2018_02<-c(rep(rnorm(14)))
mon_2018_03<-c(rep(rnorm(14)))
mon_2018_04<-c(rep(rnorm(14)))
mon_2018_05<-c(rep(rnorm(14)))
groupsum<-data.frame(ID,mon_2017_12,mon_2018_01,mon_2018_02,mon_2018_03,mon_2018_04,mon_2018_05)
groupsum$var2 <- c(letters[1:14])
head(groupsum)
groupsum %>% group_by(ID) %>% summarise_if(is.numeric,sum)