我创建了一种算法来获取子集列表的摘要。
这是我的数据框:
df <- data.frame(
Name = c("asdf", "kjhgf", "cvbnm", "rtyui", "cvbnm", "jhfd", "cvbnm", "sdfghj", "cvbnm", "dfghj", "cvbnm"),
sale = c(27,NA, 27, 16, 14,NA, 14, 14,NA, 18, 28),
city = c("CA", "TX", "MN", "NY", "TX", "MT", "HU", "KL", "TX", "SA", "TX"),
Dept = c("HH", "MM", "NN", "MM", "AA", "VV", "MM", "HU", "JJ", "MM", "ZZ")
)
现在,我根据需要创建了一些子集:
df1<- df
df$cc1<-1
#Astellas
df2<- subset(df, Dept == 'MM')
df$cc2<-ifelse(df$Dept == 'MM',1,NA)
lst<-list(df$cc1, df$cc2)
ldat<-list("ALL" = df1, "MM" =df2)
使用以下算法,我能够获得所需的摘要输出,但是我想创建一个执行相同功能的函数。请帮助找出如何创建自定义函数,以得到与以下脚本相同的结果。
df %>%
select(-Name, -city) %>%
group_by(Dept) -> dat
N <- length(dat[[1]])
Median <- median(dat[[1]])
Average <- mean(dat[[1]])
q25 <- quantile(dat[[1]])[2]
q75 <- quantile(dat[[1]])[4]
cbind(q25, Median, Average, q75, N) -> ALL
dat %>% filter(Dept == "MM") -> MM
N <- length(MM[[1]])
Median <- median(MM[[1]])
Average <- mean(MM[[1]])
q25 <- quantile(MM[[1]])[2]
q75 <- quantile(MM[[1]])[4]
cbind(q25, Median, Average, q75, N) -> MM
as.data.frame(rbind(ALL, MM)) %>%
`rownames<-`(., c("ALL", "MM")) %>%
pander::pander() %>% as.data.frame()
我需要将上面的脚本转换成这样的函数:
functiont(data=ldat,var = "sale", name_of_var = c("ALL","MM"))
对于变量列表,该函数应该是动态的。在这种情况下,我们有两个变量c("ALL" ,"MM")
。所以这应该是动态的。
输出摘要应为flextable
,如下所示:
答案 0 :(得分:1)
检查代码后,便可以使用所需功能。本解决方案中的函数采用所需的参数,但我进行了一些修改以仅使用一个数据帧,这是主要的df
。原因是ALL
的汇总将始终来自第一个大数据框,因此不需要为全局数据框创建列表,然后为过滤器创建其他对象。该函数产生所需的内容,并且可以接受您提到的任何过滤器。下面是该函数的代码和一些应用程序:
library(dplyr)
#Function
myfun <- function(data,var, name_of_var)
{
#Data
dat <- data
#Compute for all items
N <- length(dat[[var]])
Median <- median(dat[[var]])
Average <- mean(dat[[var]])
q25 <- quantile(dat[[var]])[2]
q75 <- quantile(dat[[var]])[4]
as.data.frame(cbind(q25, Median, Average, q75, N)) -> ALL
rownames(ALL) <- 'All'
#Now the filter values
dat %>% filter(Dept %in% name_of_var) -> MM
#Create list
List <- split(MM,MM$Dept)
#Apply summaries in a function
compute <- function(x)
{
N <- length(x[[var]])
Median <- median(x[[var]])
Average <- mean(x[[var]])
q25 <- quantile(x[[var]])[2]
q75 <- quantile(x[[var]])[4]
as.data.frame(cbind(q25, Median, Average, q75, N)) -> y
rownames(y) <- unique(x$Dept)
return(y)
}
#Apply
List2 <- do.call(rbind,lapply(List,compute))
#Bind all
Binded <- rbind(ALL,List2)
return(Binded)
}
现在,一些应用程序:
#Apply function 1
Ex1 <- myfun(data=df,var = "sale", name_of_var = c("MM"))
输出:
q25 Median Average q75 N
All 15.0 19 20.90909 27.0 11
MM 15.5 17 19.00000 20.5 4
示例2:
#Apply function 2
Ex2 <- myfun(data=df,var = "sale", name_of_var = c("MM","HH"))
输出:
q25 Median Average q75 N
All 15.0 19 20.90909 27.0 11
HH 27.0 27 27.00000 27.0 1
MM 15.5 17 19.00000 20.5 4
之后,您可以按任何样式设置输出的格式。
答案 1 :(得分:0)
df <- data.frame(Name = c("asdf","kjhgf","cvbnm","rtyui","cvbnm","jhfd","cvbnm","sdfghj","cvbnm","dfghj","cvbnm"),
sale=c(27,28,27,16,14,25,14,14,19,18,28),
city=c("CA","TX","MN","NY","TX","MT","HU","KL","TX","SA","TX"),
Dept = c("HH","MM","NN","MM","AA","VV","MM","HU","JJ","MM","ZZ"))
df1<- df
df$cc1<-1
df2<- subset(df, Dept == 'MM')
df$cc2<-ifelse(df$Dept == 'MM',1,NA)
lst<-list(df$cc1, df$cc2)
listd<-list("ALL" = df1, "MM" =df2)
#I want to run my function for listd so that i can get a combined summary for all variables in listd
tt2<-function(data,var,footer,Name_of_variable,decimal){
for (d in 1:length(data)) {
cat('\n\n#### ', names(data)[d], '\n\n')
md<-data[[d]]
table_list<-list()
for (i in 1:length(d))
table_list[[i]]<-t1(md,var,footer,decimal,Name_of_variable)
tt<- do.call(rbind,table_list)
}
cat(knit_print(tt))
cat('\n\n')
}
t1<-function(dataset,var,Suff,decimal,Name_of_variable){
numdig <- if (decimal == TRUE) {1} else {0}
var <- rlang::parse_expr(var)
summ_tab1<- dataset %>% filter(!is.na(!!var)) %>% summarise(
q25 = format(round(quantile(!! var, type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[2],digits = numdig),nsmall = numdig),
Median = format(round(quantile(!! var, type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[3],digits = numdig),nsmall = numdig),
Average = format(round( mean(!! var, na.rm=TRUE),digits = numdig),nsmall = numdig),
q75 = format(round(quantile(!! var, type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[4],digits = numdig) ,nsmall = numdig),
N = sum(!is.na(!!var)))
summ_tab<-summ_tab1 %>%
mutate(" "=!!Name_of_variable,
q25 = q25,
Median =Median,
Average =Average,
q75 = q75)%>%
dplyr::rename(
`25th percentile` = q25,
`75th percentile` = q75)%>%select(" ",N,everything())
summ_tab1
}
tt2(data = listd,var = "sale",Name_of_variable = "listd",decimal = TRUE)