我需要在数据框中添加新列,如果多列变量列表增加。(如果列有列表需要添加新列,否则保持该列相同)
我的数据框,
U_ID Value AD CT value1 Citycode
1 list(`Cno`="50",'cna'="\n\rjhon\n") ia BG list(`Cno`="50") TY
1 list(`Cno`="20",`cna`="guna") AS DB list(`Cno`="\n\r20") UI
2 list(`Cno`="30",`cna`="rt",`cf`="ty") BN FV list(`Cno`="30") GH
2 NULL VF TY NULL TY
3 list(`Cno`="\n\r30") RR TT list(`Cno`="30") ST
我的愿望输出将是,
U_ID Value Cno cna cf AD CT value1 Cno1 Citycode
1 list(`Cno`="50",`cna'="\n\rjhon\n") 50 jhon NULL ia BG list(`Cno1`="50") 50 TY
1 list(`Cno`="20",`cna'="guna") 20 guna NULL AS DB list(`Cno1`="\n\r20") 20 UI
2 list(`Cno`="30",`cna'="rt",`cf'="ty") 30 rt ty BN FV list(`Cno1`="30") 30 GH
2 NULL NULL NULL NULL VF TY NULL NULL TY
3 list(`Cno`="\n\r30") 30 NULL NULL RR TT list(`Cno1`="30") 30 ST
数据,
structure(list(U_ID = c(1, 1, 2, 2, 3), Value = list(structure(list(
`Cno#` = "50", cna = "\n\rjhon\n"), .Names = c("Cno#", "cna"
)), structure(list(`Cno#` = "50", cna = "guna"), .Names = c("Cno#",
"cna")), structure(list(`Cno#` = "30", cna = "rt", cf = "ty"), .Names = c("Cno#",
"cna", "cf")), "NULL", structure(list(`Cno#` = "\n\r30"), .Names = "Cno#")),
AD = c("ia", "AS", "BN", "VF", "RR"), CT = c("BG", "DB",
"FV", "TY", "TT"), Value1 = list(structure(list(`Cno#` = "50"), .Names = "Cno#"),
structure(list(`Cno#` = "\n\r20"), .Names = "Cno#"),
structure(list(`Cno#` = "30"), .Names = "Cno#"), "NULL",
structure(list(`Cno#` = "30"), .Names = "Cno#")), Citycode = c("TY",
"UI", "GH", "RY", "ST")), .Names = c("U_ID", "Value", "AD",
"CT", "Value1", "Citycode"), row.names = c(NA, -5L), class = "data.frame")
答案 0 :(得分:1)
这是dplyr
的解决方案。
library(dplyr)
dat %>%
mutate(idx = as.character(`is.na<-`(cumsum(Value != "NULL"),
Value == "NULL"))) %>%
left_join(filter(., Value != "NULL") %>%
pull(Value) %>%
bind_rows(.id = "idx"),
by = "idx") %>%
mutate(idx2 = as.character(`is.na<-`(cumsum(Value1 != "NULL"),
Value1 == "NULL"))) %>%
left_join(filter(., Value1 != "NULL") %>%
pull(Value1) %>%
bind_rows(.id = "idx2"),
by = "idx2") %>%
select(-idx, -idx2)
此处,dat
是数据框的名称。
结果:
U_ID Value AD CT Value1 Citycode Cno#.x cna cf Cno#.y
1 1 50, \n\rjhon\n ia BG 50 TY 50 \n\rjhon\n <NA> 50
2 1 50, guna AS DB \n\r20 UI 50 guna <NA> \n\r20
3 2 30, rt, ty BN FV 30 GH 30 rt ty 30
4 2 NULL VF TY NULL RY <NA> <NA> <NA> <NA>
5 3 \n\r30 RR TT 30 ST \n\r30 <NA> <NA> 30
答案 1 :(得分:0)
修改强>
用一个占据多个此类列表列的答案替换了我的答案。
这是一种可能的基础R方法:
na_if_null <- function(x) if (is.null(x)) NA else x
new_cols <- lapply(
Filter(is.list, df),
function(list_col) {
names_ <- setNames(nm = unique(do.call(c, lapply(list_col, names))))
lapply(names_, function(name) sapply(list_col, function(x)
trimws(na_if_null(as.list(x)[[name]]))))
}
)
res <- do.call(
data.frame,
c(
list(df, check.names = FALSE, stringsAsFactors = FALSE),
do.call(c, new_cols)
)
)
# U_ID Value AD CT Value1 Citycode Value.Cno# Value.cna Value.cf Value1.Cno#
# 1 1 50, \n\rjhon\n ia BG 50 TY 50 jhon <NA> 50
# 2 1 50, guna AS DB \n\r20 UI 50 guna <NA> 20
# 3 2 30, rt, ty BN FV 30 GH 30 rt ty 30
# 4 2 NULL VF TY NULL RY <NA> <NA> <NA> <NA>
# 5 3 \n\r30 RR TT 30 ST 30 <NA> <NA> 30
答案 2 :(得分:0)
我相信这完全符合您的预期输出:
library(dplyr)
df1 %>%
left_join(df1 %>%
filter(Value != "NULL") %>%
mutate(Value_ = map(Value,unlist), vnames = map(Value_,names)) %>%
unnest(Value_,vnames) %>%
spread(vnames,Value_) %>%
rename(Cno = `Cno#`)) %>%
left_join(df1 %>%
filter(Value1 != "NULL") %>%
mutate(Cno1 = map(Value1,~as.numeric(unlist(.x)))) %>%
select(-Value,-Value1)) %>%
select(U_ID,Value,Cno,cna,cf,AD,CT,Value1,Cno1,Citycode)
# U_ID Value Cno cna cf AD CT Value1 Cno1 Citycode
# 1 1 50, \n\rjhon\n 50 \n\rjhon\n <NA> ia BG 50 50 TY
# 2 1 50, guna 50 guna <NA> AS DB \n\r20 20 UI
# 3 2 30, rt, ty 30 rt ty BN FV 30 30 GH
# 4 2 NULL <NA> <NA> <NA> VF TY NULL NULL RY
# 5 3 \n\r30 \n\r30 <NA> <NA> RR TT 30 30 ST