R-如何基于多个变量列表(列)动态添加列到数据帧中

时间:2017-09-05 08:27:20

标签: r dataframe

我需要在数据框中添加新列,如果多列变量列表增加。(如果列有列表需要添加新列,否则保持该列相同)

我的数据框,

U_ID  Value                                 AD   CT value1              Citycode
    1 list(`Cno`="50",'cna'="\n\rjhon\n")   ia   BG list(`Cno`="50")       TY
    1 list(`Cno`="20",`cna`="guna")         AS   DB list(`Cno`="\n\r20")   UI
    2 list(`Cno`="30",`cna`="rt",`cf`="ty") BN   FV list(`Cno`="30")       GH
    2 NULL                                  VF   TY NULL                   TY
    3 list(`Cno`="\n\r30")                  RR   TT list(`Cno`="30")       ST

我的愿望输出将是,

U_ID  Value                                  Cno   cna   cf      AD   CT   value1                Cno1           Citycode
1     list(`Cno`="50",`cna'="\n\rjhon\n")    50    jhon  NULL    ia   BG   list(`Cno1`="50")       50              TY
1     list(`Cno`="20",`cna'="guna")          20   guna  NULL     AS   DB   list(`Cno1`="\n\r20")   20              UI
2     list(`Cno`="30",`cna'="rt",`cf'="ty")  30    rt    ty      BN   FV   list(`Cno1`="30")       30              GH
2     NULL                                   NULL  NULL  NULL    VF   TY   NULL                    NULL            TY
3     list(`Cno`="\n\r30")                   30    NULL  NULL     RR   TT  list(`Cno1`="30")       30              ST

数据,

structure(list(U_ID = c(1, 1, 2, 2, 3), Value = list(structure(list(
    `Cno#` = "50", cna = "\n\rjhon\n"), .Names = c("Cno#", "cna"
)), structure(list(`Cno#` = "50", cna = "guna"), .Names = c("Cno#", 
"cna")), structure(list(`Cno#` = "30", cna = "rt", cf = "ty"), .Names = c("Cno#", 
"cna", "cf")), "NULL", structure(list(`Cno#` = "\n\r30"), .Names = "Cno#")), 
    AD = c("ia", "AS", "BN", "VF", "RR"), CT = c("BG", "DB", 
    "FV", "TY", "TT"), Value1 = list(structure(list(`Cno#` = "50"), .Names = "Cno#"), 
        structure(list(`Cno#` = "\n\r20"), .Names = "Cno#"), 
        structure(list(`Cno#` = "30"), .Names = "Cno#"), "NULL", 
        structure(list(`Cno#` = "30"), .Names = "Cno#")), Citycode = c("TY", 
    "UI", "GH", "RY", "ST")), .Names = c("U_ID", "Value", "AD", 
"CT", "Value1", "Citycode"), row.names = c(NA, -5L), class = "data.frame")

3 个答案:

答案 0 :(得分:1)

这是dplyr的解决方案。

library(dplyr)

dat %>%
  mutate(idx = as.character(`is.na<-`(cumsum(Value != "NULL"),
                                      Value == "NULL"))) %>%
  left_join(filter(., Value != "NULL") %>%
              pull(Value) %>%
              bind_rows(.id = "idx"),
            by = "idx") %>%
  mutate(idx2 = as.character(`is.na<-`(cumsum(Value1 != "NULL"),
                                      Value1 == "NULL"))) %>%
  left_join(filter(., Value1 != "NULL") %>%
              pull(Value1) %>%
              bind_rows(.id = "idx2"),
            by = "idx2") %>%  
  select(-idx, -idx2)

此处,dat是数据框的名称。

结果:

  U_ID          Value AD CT Value1 Citycode Cno#.x        cna   cf Cno#.y
1    1 50, \n\rjhon\n ia BG     50       TY     50 \n\rjhon\n <NA>     50
2    1       50, guna AS DB \n\r20       UI     50       guna <NA> \n\r20
3    2     30, rt, ty BN FV     30       GH     30         rt   ty     30
4    2           NULL VF TY   NULL       RY   <NA>       <NA> <NA>   <NA>
5    3         \n\r30 RR TT     30       ST \n\r30       <NA> <NA>     30

答案 1 :(得分:0)

修改
用一个占据多个此类列表列的答案替换了我的答案。

这是一种可能的基础R方法:

na_if_null <- function(x) if (is.null(x)) NA else x

new_cols <- lapply(
  Filter(is.list, df),
  function(list_col) {
    names_ <- setNames(nm = unique(do.call(c, lapply(list_col, names))))
    lapply(names_, function(name) sapply(list_col, function(x) 
      trimws(na_if_null(as.list(x)[[name]]))))
  }
)

res <- do.call(
  data.frame,
  c(
    list(df, check.names = FALSE, stringsAsFactors = FALSE),
    do.call(c, new_cols)
  )
)

#   U_ID          Value AD CT Value1 Citycode Value.Cno# Value.cna Value.cf Value1.Cno#
# 1    1 50, \n\rjhon\n ia BG     50       TY         50      jhon     <NA>          50
# 2    1       50, guna AS DB \n\r20       UI         50      guna     <NA>          20
# 3    2     30, rt, ty BN FV     30       GH         30        rt       ty          30
# 4    2           NULL VF TY   NULL       RY       <NA>      <NA>     <NA>        <NA>
# 5    3         \n\r30 RR TT     30       ST         30      <NA>     <NA>          30

答案 2 :(得分:0)

我相信这完全符合您的预期输出:

library(dplyr)
df1 %>%
  left_join(df1 %>%
              filter(Value != "NULL") %>%
              mutate(Value_ = map(Value,unlist), vnames = map(Value_,names)) %>%
              unnest(Value_,vnames) %>%
              spread(vnames,Value_) %>%
              rename(Cno = `Cno#`)) %>%
  left_join(df1 %>%
              filter(Value1 != "NULL") %>%
              mutate(Cno1 = map(Value1,~as.numeric(unlist(.x)))) %>%
              select(-Value,-Value1)) %>%
  select(U_ID,Value,Cno,cna,cf,AD,CT,Value1,Cno1,Citycode)

#   U_ID          Value    Cno        cna   cf AD CT Value1 Cno1 Citycode
# 1    1 50, \n\rjhon\n     50 \n\rjhon\n <NA> ia BG     50   50       TY
# 2    1       50, guna     50       guna <NA> AS DB \n\r20   20       UI
# 3    2     30, rt, ty     30         rt   ty BN FV     30   30       GH
# 4    2           NULL   <NA>       <NA> <NA> VF TY   NULL NULL       RY
# 5    3         \n\r30 \n\r30       <NA> <NA> RR TT     30   30       ST