我是R的新手,我正在尝试使用此数据框中的值+范围,频率和模式获取摘要统计表
这就是我现在所拥有的,我尝试了各种套餐,但我还没有找到一个可以提供我需要的测量值
children_allergy_local_df <- data.frame(children_allergy_local)
child_data <- children_allergy_local %>% select(childsID, gender,
family_allergy, birth_order, birth_weight, breastfeeding, house_sqm, pets,
smoke, IgE)
child_data_df <- data.frame(child_data)
summary(child_data)
as.data.frame(summary(child_data))
child_data_summary <- do.call(cbind, lapply(child_data, summary))
child_data_summary_df <- data.frame(child_data_summary)
child_data_summary_df <- child_data_summary_df[-c(2, 5), ]
child_data_summary_df
给了我
col1 col2 col3 col 4 etc.....
min val val val
median val val val
mode val val val
max val val val
我的目标是
col1 col2 col3 col 4 etc.....
min val val val
median val val val
mode val val val
max val val val
range val val val
frequency val val val
mode val val val
有没有办法创建我想要的行?,我似乎无法在网上找到任何东西,我绝对卡住范围()似乎给了我2个值而不是我需要的1个值(最大 - 最小)
答案 0 :(得分:1)
library(dplyr)
summary <- as.data.frame(
t( # we have to transpose to look the way you want
do.call(data.frame,
list(min = apply(child_data_df, 2, min),
median = apply(child_data_df, 2, median),
mode = apply(child_data_df, 2, mode),
max = apply(child_data_df, 2, max),
freq = apply(child_data_df, 2, length),
mode = apply(child_data_df, 2, mode)) %>%
mutate(range = max - min)))
names(summary) <- names(child_data_df) # because we lost the var names
答案 1 :(得分:1)
您可以单独创建附加值的矩阵并将它们绑定在一起。这可以随意扩展。
示例:强>
library(car)
Duncan2 <- Duncan[-1]
a <- round(do.call(cbind, lapply(Duncan2, summary))[-c(2, 5), ], 2)
b <- do.call(cbind, lapply(Duncan2, function(x){
mat <- matrix(NA, ncol = 3,
dimnames = list(NULL, c("Range", "Freq.", "Mode")))
mat[,1] <- diff(range(x))
mat[,2] <- frequency(x)
mat[,3] <- mode(x)
return(t(mat))
}))
c <- as.data.frame(rbind(a, b))
c
# income education prestige
# Min. 7 7 3
# Median 42 45 41
# Mean 41.87 52.56 47.69
# Max. 81 100 97
# Range 74 93 94
# Freq. 1 1 1
# Mode numeric numeric numeric
希望它会有所帮助。
<强> 编辑: 强> 你可以轻松地将它包装成一个函数。
myCustomSum <- function(z){
a <- round(do.call(cbind, lapply(z, summary))[-c(2, 5), ], 2)
b <- do.call(cbind, lapply(z, function(x){
mat <- matrix(NA, ncol = 3,
dimnames = list(NULL, c("Range", "Freq.", "Mode")))
mat[,1] <- diff(range(x))
mat[,2] <- frequency(x)
mat[,3] <- mode(x)
return(t(mat))
}))
c <- as.data.frame(rbind(a, b))
return(c)
}
myCustomSum(Duncan2)
# income education prestige
# Min. 7 7 3
# Median 42 45 41
# Mean 41.87 52.56 47.69
# Max. 81 100 97
# Range 74 93 94
# Freq. 1 1 1
# Mode numeric numeric numeric