我有以下数据框
group = c("cat", "dog", "horse")
value = c("1", "2", "3")
list = c("siamese,burmese,balinese","corgi,sheltie,collie","arabian,friesian,andalusian" )
df = data.frame(group, value, list)
df
group value list
1 cat 1 siamese,burmese,balinese
2 dog 2 corgi,sheltie,collie
3 horse 3 arabian,friesian,andalusian
并且正在尝试实现这一目标:
group value list
1 cat 1 siamese
2 cat 1 burmese
3 cat 1 balinese
4 dog 2 corgi
5 dog 2 sheltie
6 dog 2 collie
7 horse 3 arabian
8 horse 3 friesian
9 horse 3 andalusian
我知道如何汇总一个数据框,但是现在我意识到我不知道如何用逗号分隔的字符串“汇总”。
答案 0 :(得分:1)
data.frame(
group = c("cat", "dog", "horse"),
value = c("1", "2", "3"),
list = c("siamese,burmese,balinese","corgi,sheltie,collie","arabian,friesian,andalusian"),
stringsAsFactors = FALSE
) -> xdf
tidyverse
:
tidyr::separate_rows(xdf, list, sep=",")
## group value list
## 1 cat 1 siamese
## 2 cat 1 burmese
## 3 cat 1 balinese
## 4 dog 2 corgi
## 5 dog 2 sheltie
## 6 dog 2 collie
## 7 horse 3 arabian
## 8 horse 3 friesian
## 9 horse 3 andalusian
基本R:
do.call(
rbind.data.frame,
lapply(1:nrow(xdf), function(idx) {
data.frame(
group = xdf[idx, "group"],
value = xdf[idx, "value"],
list = strsplit(xdf[idx, "list"], ",")[[1]],
stringsAsFactors = FALSE
)
})
)
## group value list
## 1 cat 1 siamese
## 2 cat 1 burmese
## 3 cat 1 balinese
## 4 dog 2 corgi
## 5 dog 2 sheltie
## 6 dog 2 collie
## 7 horse 3 arabian
## 8 horse 3 friesian
## 9 horse 3 andalusian
枪战:
microbenchmark::microbenchmark(
unnest = transform(xdf, list = strsplit(list, ",")) %>%
tidyr::unnest(list),
separate_rows = tidyr::separate_rows(xdf, list, sep=","),
base = do.call(
rbind.data.frame,
lapply(1:nrow(xdf), function(idx) {
data.frame(
group = xdf[idx, "group"],
value = xdf[idx, "value"],
list = strsplit(xdf[idx, "list"], ",")[[1]],
stringsAsFactors = FALSE
)
})
)
)
## Unit: microseconds
## expr min lq mean median uq max neval
## unnest 3689.890 4280.7045 6326.231 4881.160 6428.508 16670.715 100
## separate_rows 5093.618 5602.2510 8479.712 6289.193 10352.847 24447.528 100
## base 872.343 975.1615 1589.915 1099.391 1660.324 6663.132 100
tidyr
操作的糟糕表现让我不断感到惊讶。
答案 1 :(得分:0)
IIUC我们在R中有unnest
library(dplyr)
library(tidyr)
df = data.frame(group, value, list,stringsAsFactors = F)
df %>%
transform(list = strsplit(list, ",")) %>%
unnest(list)
group value list
1 cat 1 siamese
2 cat 1 burmese
3 cat 1 balinese
4 dog 2 corgi
5 dog 2 sheltie
6 dog 2 collie
7 horse 3 arabian
8 horse 3 friesian
9 horse 3 andalusian