我有一个包含向量作为值的列的数据框。现在我需要通过对矢量进行解包来拉伸这个数据帧。
看一下这个例子:
df <- structure(list(id = c(1, 1, 1, 1, 1, 2, 2, 2, 2, 2), id2 = c(1,
2, 3, 4, 5, 1, 2, 3, 4, 5), value = c("1", "2", "3", "c(1,3,4)",
"1:5", "1", "2", "3", "1:4", "2:4")), .Names = c("id", "id2",
"value"), row.names = c(NA, 10L), class = "data.frame")
df
id id2 value
1 1 1 1
2 1 2 2
3 1 3 3
4 1 4 c(1,3,4)
5 1 5 1:5
6 2 1 1
7 2 2 2
8 2 3 3
9 2 4 1:4
10 2 5 2:4
this has to be changed into:
df2 <- structure(list(id = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), id2 = c(1L, 2L,
3L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 1L, 2L, 3L, 4L, 4L, 4L, 4L,
5L, 5L, 5L), value = c(1L, 2L, 3L, 1L, 3L, 4L, 1L, 2L, 3L, 4L,
5L, 1L, 2L, 3L, 1L, 2L, 3L, 4L, 2L, 3L, 4L)), .Names = c("id",
"id2", "value"), class = "data.frame", row.names = c(NA, -21L
))
df2
id id2 value
1 1 1 1
2 1 2 2
3 1 3 3
4 1 4 1
5 1 4 3
6 1 4 4
7 1 5 1
8 1 5 2
9 1 5 3
10 1 5 4
11 1 5 5
12 2 1 1
13 2 2 2
14 2 3 3
15 2 4 1
16 2 4 2
17 2 4 3
18 2 4 4
19 2 5 2
20 2 5 3
21 2 5 4
所以这是一种不同类型的将宽格式转换为长格式而不是我们习惯于使用例如reshape2包。有没有人有解决这个问题的方法?
*编辑
我忘了提到一些值实际上是这样的字符值:
df <- structure(list(id = c(1, 1, 1, 1, 1, 2, 2, 2, 2, 2), id2 = c(1,
2, 3, 4, 5, 1, 2, 3, 4, 5), value = c("1", "2", "Hi", "c(1,3,4)",
"1:5", "1", "2", "hello", "1:4", "2:4")), .Names = c("id", "id2",
"value"), row.names = c(NA, 10L), class = "data.frame")
df
id id2 value
1 1 1 1
2 1 2 2
3 1 3 Hi
4 1 4 c(1,3,4)
5 1 5 1:5
6 2 1 1
7 2 2 2
8 2 3 hello
9 2 4 1:4
10 2 5 2:4
答案 0 :(得分:6)
同样使用data.table
library(data.table)
setDT(df)[, as.integer(eval.parent(parse(text = value))), by = .(id, id2)]
# id id2 V1
# 1: 1 1 1
# 2: 1 2 2
# 3: 1 3 3
# 4: 1 4 1
# 5: 1 4 3
# 6: 1 4 4
# 7: 1 5 1
# 8: 1 5 2
# 9: 1 5 3
# 10: 1 5 4
# 11: 1 5 5
# 12: 2 1 1
# 13: 2 2 2
# 14: 2 3 3
# 15: 2 4 1
# 16: 2 4 2
# 17: 2 4 3
# 18: 2 4 4
# 19: 2 5 2
# 20: 2 5 3
# 21: 2 5 4
答案 1 :(得分:3)
你可以做到
df$value <- lapply(df$value, function(x) {
res <- try(eval(parse(text=x)), silent=T)
if (!inherits(res, "try-error")) as.character(res) else x
})
# install.packages("tidyr") # uncomment and run if needed
tidyr::unnest(df)
# Source: local data frame [21 x 3]
#
# id id2 value
# (dbl) (dbl) (chr)
# 1 1 1 1
# 2 1 2 2
# 3 1 3 Hi
# 4 1 4 1
# 5 1 4 3
# 6 1 4 4
# 7 1 5 1
# 8 1 5 2
# 9 1 5 3
# 10 1 5 4
# .. ... ... ...
但是,这可能不是最优雅的选择。