df <- data.frame(n = c(3, 2, 2),
survive = c(2, 1, 2),
a = c(1,1,0),
b = c(0,0,1))
如何展开上面data.frame的最后两列,以便每行显示“n”列中指定的次数。并且第二列“幸存”根据“生存”的值
变为二进制值0/1换句话说:
n survive a b
3 2 1 0
2 1 1 0
2 2 0 1
到此
survive a b
1 1 0
1 1 0
0 1 0
1 1 0
0 1 0
1 0 1
1 0 1
答案 0 :(得分:3)
使用splitstackshape
扩展行和dplyr
,
library(splitstackshape)
library(dplyr)
df %>%
mutate(new = 1) %>%
expandRows('n') %>%
group_by(grp = cumsum(c(1, diff(survive) != 0))) %>%
mutate(survive = replace(new, tail(new, n() - survive[1]), 0)) %>%
arrange(grp, desc(survive)) %>%
ungroup() %>%
select(-c(new, grp))
# A tibble: 7 × 3
# survive a b
# <dbl> <dbl> <dbl>
#1 1 1 0
#2 1 1 0
#3 0 1 0
#4 1 1 0
#5 0 1 0
#6 1 0 1
#7 1 0 1
答案 1 :(得分:3)
我们可以使用base R
df2 <- df1[rep(1:nrow(df1), df1$n),-(1:2)]
row.names(df2) <- NULL
df2 <- cbind(Survive = unlist(Map(function(x, y) rep(c(1,0),
c(y, x-y)), df1$n, df1$survive)), df2)
df2
# Survive a b
#1 1 1 0
#2 1 1 0
#3 0 1 0
#4 1 1 0
#5 0 1 0
#6 1 0 1
#7 1 0 1
或者更多的矢量化方法
df1 <- df[rep(seq_len(nrow(df)), df$n),-(1:2)]
df1$survive <- with(df, rep(rep(c(1,0), nrow(df)), rbind(survive, n - survive)))
答案 2 :(得分:3)
几种替代解决方案:
1)使用基数R:
rn <- rep(1:nrow(df), df$n)
df2 <- df[rn,]
df2$survive <- as.integer(df2$survive >= ave(rn, rn, FUN = seq_along))
给出:
> df2[,-1]
survive a b
1: 1 1 0
2: 1 1 0
3: 0 1 0
4: 1 1 0
5: 0 1 0
6: 1 0 1
7: 1 0 1
2)使用 data.table -package:
library(data.table)
df2 <- setDT(df)[, rid := .I
][, .(survive = c(rep(1, survive), rep(0, n - survive)), a, b)
, by = rid
][, rid := NULL][]
给出:
> df2
survive a b
1: 1 1 0
2: 1 1 0
3: 0 1 0
4: 1 1 0
5: 0 1 0
6: 1 0 1
7: 1 0 1
或者更短一些:
df2 <- setDT(df)[, .(survive = c(rep(1, survive), rep(0, n - survive)), a, b), by = 1:nrow(df)
][, nrow := NULL]
3)使用 dplyr -package:
library(dplyr)
df %>%
mutate(rid = row_number()) %>%
.[rep(1:nrow(df), df$n),] %>%
group_by(rid) %>%
mutate(survive = c(rep(1, unique(survive)), rep(0, unique(n) - unique(survive))) ) %>%
ungroup() %>%
select(-n, -rid)
给出:
# A tibble: 7 × 3
survive a b
<dbl> <dbl> <dbl>
1 1 1 0
2 1 1 0
3 0 1 0
4 1 1 0
5 0 1 0
6 1 0 1
7 1 0 1
使用过的数据:
df <- data.frame(n = c(3, 2, 2),
survive = c(2, 1, 2),
a = c(1,1,0),
b = c(0,0,1))
答案 3 :(得分:2)
以下是在基础R中使用拆分/应用/组合方法的解决方案:
df2 <- do.call(rbind, lapply(split(df, seq_along(df$n)), function(i) {
survive = c(rep(1, i$survive), rep(0, i$n - i$survive))
cbind(survive, i[rep(1, i$n), c("a", "b")])
}))
结果:
survive a b
1.1 1 1 0
1.1.1 1 1 0
1.1.2 0 1 0
2.2 1 1 0
2.2.1 0 1 0
3.3 1 0 1
3.3.1 1 0 1