如何从列表中生成百分比的数据框摘要

时间:2019-06-07 12:01:26

标签: r purrr

我的数据由一个列表表组成,描述了6个类别(random_sequence_generation等)的“ H”,“ L”和“ U”频率。

我想生成一个数据帧,其中每一行代表观察到的“ H”,“ L”和“ U”的百分比,即

random_sequence_generation  6.7 63.3 30.0
allocation_concealment      0.0 43.3 56.7
...                         ...

给出数据:

   dat <- list(random_sequence_generation = structure(c(H = 2L, L = 19L, 
    U = 9L), .Dim = 3L, .Dimnames = structure(list(c("H", "L", "U"
    )), .Names = ""), class = "table"), allocation_concealment = structure(c(L = 13L, 
    U = 17L), .Dim = 2L, .Dimnames = structure(list(c("L", "U")), .Names = ""), class = "table"), 
        blinding_of_participants = structure(c(H = 30L), .Dim = 1L, .Dimnames = structure(list(
            "H"), .Names = ""), class = "table"), blinding_of_personnel = structure(c(H = 28L, 
        U = 2L), .Dim = 2L, .Dimnames = structure(list(c("H", "U"
        )), .Names = ""), class = "table"), blinding_of_outcome_assessor = structure(c(H = 17L, 
        L = 8L, U = 5L), .Dim = 3L, .Dimnames = structure(list(c("H", 
        "L", "U")), .Names = ""), class = "table"), incomplete_outcome_data = structure(c(H = 10L, 
        L = 20L), .Dim = 2L, .Dimnames = structure(list(c("H", "L"
        )), .Names = ""), class = "table"))

3 个答案:

答案 0 :(得分:4)

使用sapply

的基本R方式
val <- c("H", "L", "U")
t(sapply(dat, function(x) {
        tot = rep(0, length(val))
        tot[match(names(x), val)] = x/sum(x)
        tot
}))

#                                  [,1]    [,2]     [,3]
#random_sequence_generation     6.66667 63.3333 30.00000
#allocation_concealment         0.00000 43.3333 56.66667
#blinding_of_participants     100.00000  0.0000  0.00000
#blinding_of_personnel         93.33333  0.0000  6.66667
#blinding_of_outcome_assessor  56.66667 26.6667 16.66667
#incomplete_outcome_data       33.33333 66.6667  0.00000

我们首先创建一个length 3,match名称的向量,并用x除以sum来分配值。感谢@Rohit注意到以前的方法中的问题。

答案 1 :(得分:4)

dat <- list(random_sequence_generation = structure(c(H = 2L, L = 19L, 
                                                     U = 9L), .Dim = 3L, .Dimnames = structure(list(c("H", "L", "U"
                                                     )), .Names = ""), class = "table"), allocation_concealment = structure(c(L = 13L, 
                                                                                                                              U = 17L), .Dim = 2L, .Dimnames = structure(list(c("L", "U")), .Names = ""), class = "table"), 
            blinding_of_participants = structure(c(H = 30L), .Dim = 1L, .Dimnames = structure(list(
              "H"), .Names = ""), class = "table"), blinding_of_personnel = structure(c(H = 28L, 
                                                                                        U = 2L), .Dim = 2L, .Dimnames = structure(list(c("H", "U"
                                                                                        )), .Names = ""), class = "table"), blinding_of_outcome_assessor = structure(c(H = 17L, 
                                                                                                                                                                       L = 8L, U = 5L), .Dim = 3L, .Dimnames = structure(list(c("H", 
                                                                                                                                                                                                                                "L", "U")), .Names = ""), class = "table"), incomplete_outcome_data = structure(c(H = 10L, 
                                                                                                                                                                                                                                                                                                                  L = 20L), .Dim = 2L, .Dimnames = structure(list(c("H", "L"
                                                                                                                                                                                                                                                                                                                  )), .Names = ""), class = "table"))

flatten_list<-function (mydata) {
    result <- plyr::ldply(lapply(mydata, function(x) {
        data.frame(x, stringsAsFactors = FALSE)
    }))
    return(result)
}    

res<-flatten_list(lapply(dat, prop.table))
reshape(res,idvar=".id",timevar="Var1",direction="wide")

,这是输出,不确定是否正确。是吗?

                            .id     Freq.H    Freq.L     Freq.U
1    random_sequence_generation 0.06666667 0.6333333 0.30000000
4        allocation_concealment         NA 0.4333333 0.56666667
6      blinding_of_participants 1.00000000        NA         NA
7         blinding_of_personnel 0.93333333        NA 0.06666667
9  blinding_of_outcome_assessor 0.56666667 0.2666667 0.16666667
12      incomplete_outcome_data 0.33333333 0.6666667         NA

答案 2 :(得分:1)

这是一种melt/acast方法

library(reshape2)
100 *prop.table(acast(melt(dat), L1~ Var1,  FUN = sum, fill = 0), 1)
#                                   H        L         U
#allocation_concealment         0.000000 43.33333 56.666667
#blinding_of_outcome_assessor  56.666667 26.66667 16.666667
#blinding_of_participants     100.000000  0.00000  0.000000
#blinding_of_personnel         93.333333  0.00000  6.666667
#incomplete_outcome_data       33.333333 66.66667  0.000000
#random_sequence_generation     6.666667 63.33333 30.000000

或带有tidyverse的选项

library(tidyverse)
bind_rows(map(dat, enframe), .id = 'grp') %>% 
         group_by(grp) %>% 
         mutate(value = 100 *value/sum(value)) %>%
         spread(name, value, fill = 0)
# A tibble: 6 x 4
# Groups:   grp [6]
#  grp                               H     L     U
#  <chr>                         <dbl> <dbl> <dbl>
#1 allocation_concealment         0     43.3 56.7 
#2 blinding_of_outcome_assessor  56.7   26.7 16.7 
#3 blinding_of_participants     100      0    0   
#4 blinding_of_personnel         93.3    0    6.67
#5 incomplete_outcome_data       33.3   66.7  0   
#6 random_sequence_generation     6.67  63.3 30