我的数据框如下:
library("dplyr")
df <- data.frame(
name=c('group1', 'group2'),
n_success=c(32, 30),
n=c(122, 123),
stringsAsFactors = FALSE
)
对于每个小组,我从beta版本中获取1000个样本:
df <- df %>%
mutate(sims = list(rbeta(1000, 1+n_success, 1+n-n_success))) %>%
select(name, sims)
# str(df)
# prints out:
# name: chr "group1" "group2"
# sims: List of 1
我现在有一个数据框,其中每一行都包含一个字符串和一个列表。
如何从此处转到列名为“group1”和“group2”的数据框,并且每列是上面观察到的1000个模拟?请注意,组的数量可能非常随意,所以如果我有12个组,我想要12列。
答案 0 :(得分:2)
使用:
library(dplyr)
library(tidyr)
df %>%
mutate(sims = list(rbeta(1000, 1+n_success, 1+n-n_success))) %>%
select(name, sims) %>%
unnest() %>%
group_by(name) %>%
mutate(rn = row_number()) %>%
spread(name, sims) %>%
select(group1, group2)
你得到:
group1 group2 * <dbl> <dbl> 1 0.2448308 0.2448308 2 0.2580710 0.2580710 3 0.2249618 0.2249618 4 0.2652175 0.2652175 5 0.3002762 0.3002762 6 0.1852094 0.1852094 7 0.2706153 0.2706153 8 0.2580558 0.2580558 9 0.2264272 0.2264272 10 0.3198264 0.3198264 # ... with 990 more rows
data.table
- 包可能更适合所需的转换。使用:
library(data.table)
dcast(setDT(df)[, .(sims = rbeta(1000, 1+n_success, 1+n-n_success)), by = name],
rowid(name) ~ name, value.var = 'sims')[, name := NULL][]
你得到:
group1 group2 1: 0.2882302 0.3061312 2: 0.2615165 0.2763967 3: 0.2885236 0.2516134 4: 0.2516337 0.2455496 5: 0.2635944 0.2267952 --- 996: 0.2658737 0.2525680 997: 0.3045952 0.2193125 998: 0.2505284 0.1967361 999: 0.2723949 0.2389607 1000: 0.2544297 0.2477589
基础R中的替代方案:
f <- function(x) rbeta(1000, 1+x[['n_success']], 1+x[['n']]-x[['n_success']])
lst_1 <- split(df, df$name)
lst_2 <- lapply(lst_1, f)
do.call(cbind.data.frame, lst_2)
答案 1 :(得分:1)
您还可以坚持dplyr
和tidyverse
。我会这样做
library(dplyr)
library(tidyr) # for unnest() and spread()
df <- data.frame(
name=c('group1', 'group2'),
n_success=c(32, 30),
n=c(122, 123),
stringsAsFactors = FALSE
)
# continuing your approach (be aware that I added a list() and closed a missing parenthesis)
df2 <- df %>%
mutate(sims = list(rbeta(1000, 1+n_success, 1+n-n_success))) %>%
select(name, sims)
str(df2)
#> 'data.frame': 2 obs. of 2 variables:
#> $ name: chr "group1" "group2"
#> $ sims:List of 2
#> ..$ : num 0.178 0.313 0.272 0.25 0.271 ...
#> ..$ : num 0.178 0.313 0.272 0.25 0.271 ...
# using unnest and mutate to create a variable that labels the rows
df3 <- df2 %>% unnest %>% group_by(name) %>% mutate(num = 1:n())
df3
#> # A tibble: 2,000 x 3
#> # Groups: name [2]
#> name sims num
#> <chr> <dbl> <int>
#> 1 group1 0.1779776 1
#> 2 group1 0.3134262 2
#> 3 group1 0.2724994 3
#> 4 group1 0.2496521 4
#> 5 group1 0.2714030 5
#> 6 group1 0.2192758 6
#> 7 group1 0.2056501 7
#> 8 group1 0.2210970 8
#> 9 group1 0.2505481 9
#> 10 group1 0.2945622 10
#> # ... with 1,990 more rows
# spread the data-frame again
df_final <- df3 %>% spread(key = name, value = sims)
df_final
#> # A tibble: 1,000 x 3
#> num group1 group2
#> * <int> <dbl> <dbl>
#> 1 1 0.1779776 0.1779776
#> 2 2 0.3134262 0.3134262
#> 3 3 0.2724994 0.2724994
#> 4 4 0.2496521 0.2496521
#> 5 5 0.2714030 0.2714030
#> 6 6 0.2192758 0.2192758
#> 7 7 0.2056501 0.2056501
#> 8 8 0.2210970 0.2210970
#> 9 9 0.2505481 0.2505481
#> 10 10 0.2945622 0.2945622
#> # ... with 990 more rows
如果您不想/需要num-variable,可以使用select(df_final, -num)
再次取消选择。
这对你有帮助吗?