我有一个名为df的数据框,如何从现有列表列数据框创建新列。
我的数据框。
Policy Item
Checked list(Processed = "Valid", Gmail = "yy@gmail", Information = list(list(Descrption = "T1, R1", VID = "YUY")))
Sample list(Processed = "Valid", Gmail = "tt@gmail", Information = list(list(Descrption = "D3, Y3", VID = "RT")))
Processed list(Processed = "Valid", Gmail = "pp@gmail", Information = list(list(Descrption = "Y2, LE", VID = "UIU")))
我预期的数据框架。
Policy Processed Gmail Descrption VID
Checked Valid yy@gmail "T1,R1" "YUY"
Sample Valid tt@gmail "D3,Y3" "RT"
Processed Valid pp@gmail "Y2,LE" "UIU"
我使用下面的代码来获取我预期的数据框。
na_if_null <- function(x) if (is.null(x)) NA else x
new_cols <- lapply(
Filter(is.list, df),
function(list_col) {
names_ <- setNames(nm = unique(do.call(c, lapply(list_col, names))))
lapply(names_, function(name) sapply(list_col, function(x)
trimws(na_if_null(as.list(x)[[name]]))))
}
)
res <- do.call(
data.frame,
c(
list(df, check.names = FALSE, stringsAsFactors = FALSE),
do.call(c, new_cols)
)
)
但是我已经低于数据框了。请帮我完成我的帖子。
Policy Item Item.Processed Item.Gmail Item.Information
Checked list(Processed = "Valid", Gmail = "yy@gmail", Information = list(list(Descrption = "T1, R1", VID = "YUY"))) Processed yy@gmail list(Descrption = "T1, R1", VID = "YUY")
Sample list(Processed = "Valid", Gmail = "tt@gmail", Information = list(list(Descrption = "D3, Y3", VID = "RT"))) Processed tt@gmail list(Descrption = "D3, Y3", VID = "RT")
Processed list(Processed = "Valid", Gmail = "pp@gmail", Information = list(list(Descrption = "Y2, LE", VID = "UIU"))) Processed pp@gmail list(Descrption = "Y2, LE", VID = "UIU")
dput
structure(list(Policy = c("Checked", "Sample", "Processed"), Item = list(
structure(list(Processed = "Valid", Gmail = "yy@gmail", Information = list(
structure(list(Descrption = "T1, R1", VID = "YUY"), .Names = c("Descrption",
"VID"), class = "data.frame", row.names = 1L))), .Names = c("Processed",
"Gmail", "Information"), class = "data.frame", row.names = 1L),
structure(list(Processed = "Valid", Gmail = "tt@gmail", Information = list(
structure(list(Descrption = "D3, Y3", VID = "RT"), .Names = c("Descrption",
"VID"), class = "data.frame", row.names = 1L))), .Names = c("Processed",
"Gmail", "Information"), class = "data.frame", row.names = 1L),
structure(list(Processed = "Valid", Gmail = "pp@gmail", Information = list(
structure(list(Descrption = "Y2, LE", VID = "UIU"), .Names = c("Descrption",
"VID"), class = "data.frame", row.names = 1L))), .Names = c("Processed",
"Gmail", "Information"), class = "data.frame", row.names = 1L))), row.names = c(NA,
3L), class = "data.frame", .Names = c("Policy", "Item"))
示例数据框
Policy colval Item
Checked list(PID="4",Bdetail ="ui,89") list(Processed = "Valid", Gmail = "yy@gmail", Information = list(list(Descrption = "T1, R1", VID = "YUY")))
Sample list(PID="7",Bdetail ="ju,78") list(Processed = "Valid", Gmail = "tt@gmail", Information = list(list(Descrption = "D3, Y3", VID = "RT")))
Processed list(PID ="8",Bdetail ="nj,45") list(Processed = "Valid", Gmail = "pp@gmail", Information = list(list(Descrption = "Y2, LE", VID = "UIU")))
答案 0 :(得分:0)
这里是基础R的解决方案:
dd <-
cbind(
dx$Policy,
do.call(rbind,
lapply(seq_len(nrow(dx)), function(i)unlist(dx$Item[i]))
)
)
colnames(dd) <- c("Policy","Processed","Gmail","Descrption","VID")
dd
# Policy Processed Gmail Descrption VID
# [1,] "Checked" "Valid" "yy@gmail" "T1, R1" "YUY"
# [2,] "Sample" "Valid" "tt@gmail" "D3, Y3" "RT"
# [3,] "Processed" "Valid" "pp@gmail" "Y2, LE" "UIU"
基本上我每个项目使用unlist
。然后使用经典d.call(rbind,llist)
加入它们。
如果您想要使用与原始子列表相同的名称,您可以执行以下操作:
colnames(dd) <- c("Policy",gsub(".*[.]","",colnames(dd)[-1]))
library(data.table)
setDT(dx)
dx[, rbindlist(lapply(.SD,function(x)data.table(t(unlist(x))))),Policy]
答案 1 :(得分:0)
使用unnest
中的tidyr
轻松完成:
library(dplyr)
library(tidyr)
df %>%
unnest() %>%
unnest()
<强>结果:强>
Policy Processed Gmail Descrption VID
1 Checked Valid yy@gmail T1, R1 YUY
2 Sample Valid tt@gmail D3, Y3 RT
3 Processed Valid pp@gmail Y2, LE UIU
数据:强>
df = structure(list(Policy = c("Checked", "Sample", "Processed"), Item = list(
structure(list(Processed = "Valid", Gmail = "yy@gmail", Information = list(
structure(list(Descrption = "T1, R1", VID = "YUY"), .Names = c("Descrption",
"VID"), class = "data.frame", row.names = 1L))), .Names = c("Processed",
"Gmail", "Information"), class = "data.frame", row.names = 1L),
structure(list(Processed = "Valid", Gmail = "tt@gmail", Information = list(
structure(list(Descrption = "D3, Y3", VID = "RT"), .Names = c("Descrption",
"VID"), class = "data.frame", row.names = 1L))), .Names = c("Processed",
"Gmail", "Information"), class = "data.frame", row.names = 1L),
structure(list(Processed = "Valid", Gmail = "pp@gmail", Information = list(
structure(list(Descrption = "Y2, LE", VID = "UIU"), .Names = c("Descrption",
"VID"), class = "data.frame", row.names = 1L))), .Names = c("Processed",
"Gmail", "Information"), class = "data.frame", row.names = 1L))), row.names = c(NA,
3L), class = "data.frame", .Names = c("Policy", "Item"))
注意:强>
注意我使用了两次unnest
,因为原始数据框中有两个级别的列表。 unnest
会自动展平数据框中的所有列表并重复使用这些名称,但它不会递归执行,因此您必须拥有与列表级别一样多的unnest
。