我有多个变量的数据框,一些变量只包含0和1,其他列包含所有可能的值。
如何总结df列仅包含0和& 1是“sts_1 = sum(sts_1 * 0.25,na.rm = T)”和其他列“non_sts_3 = mean(non_sts_3,na.rm = T)”,没有指定列名。
df <- data.frame(year=c("2014","2014","2015","2015","2015"),
month_=c("Jan","Jan","Jan","Jan","Feb"),
sts_1=c(0,1,1,1,0),
sts_2=c(1,0,0,1,NA),
non_sts_1=c(0,3,7,31,10),
non_sts_2=c(1,4,NA,12,6),
non_sts_3 = c(12,14,18,1,9))
我们可以通过dplyr手动输入列名来实现以下代码
df<-group_by(df,year, month_)
df_aggregation<-summarise(df,
non_sts_1=mean(non_sts_1,na.rm = T),
non_sts_2=mean(non_sts_2,na.rm = T),
non_sts_3=mean(non_sts_3,na.rm = T),
sts_1=sum(sts_1*0.25,na.rm=T),
sts_2=sum(sts_2*0.25,na.rm=T))
提前致谢...
答案 0 :(得分:5)
@ akrun的答案是直截了当的。但是,如果您不想进行不必要的计算,则可以定义直接区分的函数:
library(dplyr)
mysumm <- function(x, na.rm = FALSE) {
if (all(x %in% 0:1)) {
sum(x * 0.25, na.rm = na.rm)
} else {
mean(x, na.rm = na.rm)
}
}
df %>%
group_by(year, month_) %>%
summarise_if(is.numeric, mysumm, na.rm = TRUE)
# # A tibble: 3 x 7
# # Groups: year [?]
# year month_ sts_1 sts_2 non_sts_1 non_sts_2 non_sts_3
# <fctr> <fctr> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 2014 Jan 0.25 0.25 1.5 2.5 13.0
# 2 2015 Feb 0.00 NaN 10.0 6.0 9.0
# 3 2015 Jan 0.50 0.25 19.0 12.0 9.5
答案 1 :(得分:4)
我们可以使用http://localhost:3000/onboarding?supportSignUp=true&supportForgotPassword=true&email=mail%40test.com&message=Your%20email%20was%20verified.%20You%20can%20continue%20using%20the%20application.&success=true#
然后删除额外的列
summarise_all
如果我们要在不同的列集上应用多组函数,那么另一种方法是通过分别在不同的列块上应用函数然后连接来接近
df %>%
group_by(year, month_) %>%
summarise_all(funs(mean(., na.rm = TRUE), sum(.*0.25, na.rm = TRUE))) %>%
select(matches("month_|non_sts.*mean|\\bsts.*sum"))
# A tibble: 3 x 7
# Groups: year [2]
# year month_ non_sts_1_mean non_sts_2_mean non_sts_3_mean sts_1_sum sts_2_sum
# <fctr> <fctr> <dbl> <dbl> <dbl> <dbl> <dbl>
#1 2014 Jan 1.5 2.5 13.0 0.25 0.25
#2 2015 Feb 10.0 6.0 9.0 0.00 0.00
#3 2015 Jan 19.0 12.0 9.5 0.50 0.25
注意:此方法可灵活用于任何一组列
如果我们要修改0:1案例的方法
library(tidyverse)
flist <- list(function(x) mean(x, na.rm = TRUE), function(x) sum(x*0.25, na.rm = TRUE))
nm1 <- c("^non_sts", "^sts")
map2(nm1, flist, ~df %>%
group_by(year, month_) %>%
summarise_at(vars(matches(.x)), funs(.y))) %>%
reduce(inner_join, by = c('year', 'month_'))
# A tibble: 3 x 7
# Groups: year [?]
# year month_ non_sts_1 non_sts_2 non_sts_3 sts_1 sts_2
# <fctr> <fctr> <dbl> <dbl> <dbl> <dbl> <dbl>
#1 2014 Jan 1.5 2.5 13.0 0.25 0.25
#2 2015 Feb 10.0 6.0 9.0 0.00 0.00
#3 2015 Jan 19.0 12.0 9.5 0.50 0.25
然后移除l1 <- df %>%
summarise_at(3:7, funs(all(. %in% c(0, 1, NA)))) %>%
unlist
nm1 <- split(names(df)[-(1:2)], l1)