将列表列表转换为data.frame

时间:2017-08-22 11:25:22

标签: r

我的数据框如下:

library("dplyr")

df <- data.frame(
    name=c('group1', 'group2'), 
    n_success=c(32, 30), 
    n=c(122, 123), 
    stringsAsFactors = FALSE
)

对于每个小组,我从beta版本中获取1000个样本:

df <- df %>% 
  mutate(sims = list(rbeta(1000, 1+n_success, 1+n-n_success))) %>%
  select(name, sims)

# str(df)
# prints out:
# name: chr "group1" "group2"
# sims: List of 1

我现在有一个数据框,其中每一行都包含一个字符串和一个列表。

如何从此处转到列名为“group1”和“group2”的数据框,并且每列是上面观察到的1000个模拟?请注意,组的数量可能非常随意,所以如果我有12个组,我想要12列。

2 个答案:

答案 0 :(得分:2)

使用:

library(dplyr)
library(tidyr)
df %>% 
  mutate(sims = list(rbeta(1000, 1+n_success, 1+n-n_success))) %>%
  select(name, sims) %>% 
  unnest() %>% 
  group_by(name) %>% 
  mutate(rn = row_number()) %>% 
  spread(name, sims) %>% 
  select(group1, group2)

你得到:

      group1    group2
 *     <dbl>     <dbl>
 1 0.2448308 0.2448308
 2 0.2580710 0.2580710
 3 0.2249618 0.2249618
 4 0.2652175 0.2652175
 5 0.3002762 0.3002762
 6 0.1852094 0.1852094
 7 0.2706153 0.2706153
 8 0.2580558 0.2580558
 9 0.2264272 0.2264272
10 0.3198264 0.3198264
# ... with 990 more rows

data.table - 包可能更适合所需的转换。使用:

library(data.table)
dcast(setDT(df)[, .(sims = rbeta(1000, 1+n_success, 1+n-n_success)), by = name],
      rowid(name) ~ name, value.var = 'sims')[, name := NULL][]

你得到:

         group1    group2
   1: 0.2882302 0.3061312
   2: 0.2615165 0.2763967
   3: 0.2885236 0.2516134
   4: 0.2516337 0.2455496
   5: 0.2635944 0.2267952
  ---                    
 996: 0.2658737 0.2525680
 997: 0.3045952 0.2193125
 998: 0.2505284 0.1967361
 999: 0.2723949 0.2389607
1000: 0.2544297 0.2477589

基础R中的替代方案:

f <- function(x) rbeta(1000, 1+x[['n_success']], 1+x[['n']]-x[['n_success']])

lst_1 <- split(df, df$name)
lst_2 <- lapply(lst_1, f)
do.call(cbind.data.frame, lst_2)

答案 1 :(得分:1)

您还可以坚持dplyrtidyverse。我会这样做

library(dplyr)
library(tidyr) # for unnest() and spread()

df <- data.frame(
  name=c('group1', 'group2'), 
  n_success=c(32, 30), 
  n=c(122, 123), 
  stringsAsFactors = FALSE
)

# continuing your approach (be aware that I added a list() and closed a missing parenthesis)
df2 <- df %>% 
  mutate(sims = list(rbeta(1000, 1+n_success, 1+n-n_success))) %>%
  select(name, sims)
str(df2)
#> 'data.frame':    2 obs. of  2 variables:
#>  $ name: chr  "group1" "group2"
#>  $ sims:List of 2
#>   ..$ : num  0.178 0.313 0.272 0.25 0.271 ...
#>   ..$ : num  0.178 0.313 0.272 0.25 0.271 ...


# using unnest and mutate to create a variable that labels the rows
df3 <- df2 %>% unnest %>% group_by(name) %>% mutate(num = 1:n())
df3
#> # A tibble: 2,000 x 3
#> # Groups:   name [2]
#>      name      sims   num
#>     <chr>     <dbl> <int>
#>  1 group1 0.1779776     1
#>  2 group1 0.3134262     2
#>  3 group1 0.2724994     3
#>  4 group1 0.2496521     4
#>  5 group1 0.2714030     5
#>  6 group1 0.2192758     6
#>  7 group1 0.2056501     7
#>  8 group1 0.2210970     8
#>  9 group1 0.2505481     9
#> 10 group1 0.2945622    10
#> # ... with 1,990 more rows

# spread the data-frame again
df_final <- df3 %>% spread(key = name, value = sims)
df_final
#> # A tibble: 1,000 x 3
#>      num    group1    group2
#>  * <int>     <dbl>     <dbl>
#>  1     1 0.1779776 0.1779776
#>  2     2 0.3134262 0.3134262
#>  3     3 0.2724994 0.2724994
#>  4     4 0.2496521 0.2496521
#>  5     5 0.2714030 0.2714030
#>  6     6 0.2192758 0.2192758
#>  7     7 0.2056501 0.2056501
#>  8     8 0.2210970 0.2210970
#>  9     9 0.2505481 0.2505481
#> 10    10 0.2945622 0.2945622
#> # ... with 990 more rows

如果您不想/需要num-variable,可以使用select(df_final, -num)再次取消选择。

这对你有帮助吗?