百分比摘要

时间:2014-07-02 18:53:55

标签: r summary

我有一个样本数据集,如下所示。我可以从这里轻松得到摘要。

a <- structure(list(Occ = c(1, 1, 2, 2, 3, 3, 4, 5, 5, 5), 
Type = c("A", "B", "C", "A", "A", "A", "B", "C", "C", "B"), 
Alc = c("A", "B", "N", "A", "N", "N", "N", "A", "B", "B"), 
Count = c(10, 10, 20, 10, 15, 15, 10, 10, 20, 15)),
.Names = c("Occ", "Type", "Alc", "Count"), row.names = c(NA, -10L), class = "data.frame")
a$Occ <- factor(a$Occ)
a$Type <- factor(a$Type)
a$Alc<- factor(a$Alc)
a
   Occ Type Alc Count
1    1    A   A    10
2    1    B   B    10
3    2    C   N    20
4    2    A   A    10
5    3    A   N    15
6    3    A   N    15
7    4    B   N    10
8    5    C   A    10
9    5    C   B    20
10   5    B   B    15

summary(a)
Occ   Type  Alc       Count     
1:2   A:4   A:3   Min.   :10.0  
2:2   B:3   B:3   1st Qu.:10.0  
3:2   C:3   N:4   Median :12.5  
4:1               Mean   :13.5  
5:3               3rd Qu.:15.0  
                  Max.   :20.0 

但我希望根据每个变量的Count得到百分比摘要,如下所示。

 Occ        Type       Alc       
 1:14.9%    A:37.1%    A:22.2%     
 2:22.2%    B:25.8%    B:33.3%    
 3:22.2%    C:37.1%    N:44.5%   
 4:7.4%              
 5:38.3%    

感谢任何帮助。

3 个答案:

答案 0 :(得分:3)

这是您的起点。您可能需要稍微修改以满足您的特定需求。

library(data.table)
dt = as.data.table(a)

for(b in names(dt)[1:3]) print(dt[, sum(Count), by = b][, V1 := 100*V1/sum(V1)])
#   Occ        V1
#1:   1 14.814815
#2:   2 22.222222
#3:   3 22.222222
#4:   4  7.407407
#5:   5 33.333333
#   Type       V1
#1:    A 37.03704
#2:    B 25.92593
#3:    C 37.03704
#   Alc       V1
#1:   A 22.22222
#2:   B 33.33333
#3:   N 44.44444

答案 1 :(得分:1)

计算这些值的最佳基函数可能是xtabs。在这里,我用一些格式包装它,使其成为看起来百分比的值

myfactors <- names(a)[sapply(a, is.factor)]
lapply(myfactors, function(f) {
    round(xtabs(as.formula(paste0("Count~", f)), a)/sum(a$Count)*100,2)
})

这将生成表格列表

# [[1]]
# Occ
#     1     2     3     4     5 
# 14.81 22.22 22.22  7.41 33.33 
# 
# [[2]]
# Type
#     A     B     C 
# 37.04 25.93 37.04 
# 
# [[3]]
# Alc
#     A     B     N 
# 22.22 33.33 44.44 

答案 2 :(得分:0)

require(Hmisc)  # for wtd.table
 sapply( 
      sapply( 
             sapply(a[1:3], wtd.table, a$Count, "table"), 
             "/", sum(a$Count)/100 ), 
      round, 1)
$Occ
   1    2    3    4    5 
14.8 22.2 22.2  7.4 33.3 

$Type
   A    B    C 
37.0 25.9 37.0 

$Alc
   A    B    N 
22.2 33.3 44.4 

提升其他两个看起来更好的答案。