如何动态地从R中的现有列值创建新列

时间:2017-11-13 08:13:38

标签: r

我有一个名为df的数据框,如何从现有列表列数据框创建新列。

我的数据框。

Policy             Item

Checked           list(Processed = "Valid", Gmail = "yy@gmail", Information = list(list(Descrption = "T1, R1", VID = "YUY")))

Sample            list(Processed = "Valid", Gmail = "tt@gmail", Information = list(list(Descrption = "D3, Y3", VID = "RT")))

Processed         list(Processed = "Valid", Gmail = "pp@gmail", Information = list(list(Descrption = "Y2, LE", VID = "UIU")))

我预期的数据框架。

Policy          Processed    Gmail        Descrption  VID

Checked           Valid      yy@gmail       "T1,R1"  "YUY"

Sample            Valid      tt@gmail       "D3,Y3"  "RT"

Processed         Valid      pp@gmail       "Y2,LE"  "UIU"

我使用下面的代码来获取我预期的数据框。

na_if_null <- function(x) if (is.null(x)) NA else x

new_cols <- lapply(
  Filter(is.list, df),
  function(list_col) {
    names_ <- setNames(nm = unique(do.call(c, lapply(list_col, names))))
    lapply(names_, function(name) sapply(list_col, function(x) 
      trimws(na_if_null(as.list(x)[[name]]))))
  }
)

res <- do.call(
  data.frame,
  c(
    list(df, check.names = FALSE, stringsAsFactors = FALSE),
    do.call(c, new_cols)
  )
)

但是我已经低于数据框了。请帮我完成我的帖子。

Policy       Item                                                                                                          Item.Processed    Item.Gmail     Item.Information

Checked      list(Processed = "Valid", Gmail = "yy@gmail", Information = list(list(Descrption = "T1, R1", VID = "YUY")))    Processed        yy@gmail      list(Descrption = "T1, R1", VID = "YUY")

Sample       list(Processed = "Valid", Gmail = "tt@gmail", Information = list(list(Descrption = "D3, Y3", VID = "RT")))     Processed        tt@gmail      list(Descrption = "D3, Y3", VID = "RT")  

Processed    list(Processed = "Valid", Gmail = "pp@gmail", Information = list(list(Descrption = "Y2, LE", VID = "UIU")))    Processed        pp@gmail      list(Descrption = "Y2, LE", VID = "UIU")

dput

    structure(list(Policy = c("Checked", "Sample", "Processed"), Item = list(
    structure(list(Processed = "Valid", Gmail = "yy@gmail", Information = list(
        structure(list(Descrption = "T1, R1", VID = "YUY"), .Names = c("Descrption", 
        "VID"), class = "data.frame", row.names = 1L))), .Names = c("Processed", 
    "Gmail", "Information"), class = "data.frame", row.names = 1L), 
    structure(list(Processed = "Valid", Gmail = "tt@gmail", Information = list(
        structure(list(Descrption = "D3, Y3", VID = "RT"), .Names = c("Descrption", 
        "VID"), class = "data.frame", row.names = 1L))), .Names = c("Processed", 
    "Gmail", "Information"), class = "data.frame", row.names = 1L), 
    structure(list(Processed = "Valid", Gmail = "pp@gmail", Information = list(
        structure(list(Descrption = "Y2, LE", VID = "UIU"), .Names = c("Descrption", 
        "VID"), class = "data.frame", row.names = 1L))), .Names = c("Processed", 
    "Gmail", "Information"), class = "data.frame", row.names = 1L))), row.names = c(NA, 
3L), class = "data.frame", .Names = c("Policy", "Item"))

示例数据框

Policy             colval                                 Item     

Checked         list(PID="4",Bdetail ="ui,89")      list(Processed = "Valid", Gmail = "yy@gmail", Information = list(list(Descrption = "T1, R1", VID = "YUY")))

Sample          list(PID="7",Bdetail ="ju,78")      list(Processed = "Valid", Gmail = "tt@gmail", Information = list(list(Descrption = "D3, Y3", VID = "RT")))

Processed       list(PID ="8",Bdetail ="nj,45")     list(Processed = "Valid", Gmail = "pp@gmail", Information = list(list(Descrption = "Y2, LE", VID = "UIU")))

2 个答案:

答案 0 :(得分:0)

这里是基础R的解决方案:

dd <- 
cbind(
  dx$Policy,
  do.call(rbind,
          lapply(seq_len(nrow(dx)), function(i)unlist(dx$Item[i]))
  )
)

colnames(dd) <- c("Policy","Processed","Gmail","Descrption","VID")

dd

#       Policy      Processed Gmail      Descrption VID  
# [1,] "Checked"   "Valid"   "yy@gmail" "T1, R1"   "YUY"
# [2,] "Sample"    "Valid"   "tt@gmail" "D3, Y3"   "RT" 
# [3,] "Processed" "Valid"   "pp@gmail" "Y2, LE"   "UIU"

基本上我每个项目使用unlist。然后使用经典d.call(rbind,llist)加入它们。

修改

如果您想要使用与原始子列表相同的名称,您可以执行以下操作:

colnames(dd) <- c("Policy",gsub(".*[.]","",colnames(dd)[-1]))

data.table解决方案

library(data.table)
setDT(dx)
dx[, rbindlist(lapply(.SD,function(x)data.table(t(unlist(x))))),Policy]

答案 1 :(得分:0)

使用unnest中的tidyr轻松完成:

library(dplyr)
library(tidyr)

df %>%
  unnest() %>%
  unnest()

<强>结果:

     Policy Processed    Gmail Descrption VID
1   Checked     Valid yy@gmail     T1, R1 YUY
2    Sample     Valid tt@gmail     D3, Y3  RT
3 Processed     Valid pp@gmail     Y2, LE UIU

数据:

df =     structure(list(Policy = c("Checked", "Sample", "Processed"), Item = list(
  structure(list(Processed = "Valid", Gmail = "yy@gmail", Information = list(
    structure(list(Descrption = "T1, R1", VID = "YUY"), .Names = c("Descrption", 
                                                                   "VID"), class = "data.frame", row.names = 1L))), .Names = c("Processed", 
                                                                                                                               "Gmail", "Information"), class = "data.frame", row.names = 1L), 
  structure(list(Processed = "Valid", Gmail = "tt@gmail", Information = list(
    structure(list(Descrption = "D3, Y3", VID = "RT"), .Names = c("Descrption", 
                                                                  "VID"), class = "data.frame", row.names = 1L))), .Names = c("Processed", 
                                                                                                                              "Gmail", "Information"), class = "data.frame", row.names = 1L), 
  structure(list(Processed = "Valid", Gmail = "pp@gmail", Information = list(
    structure(list(Descrption = "Y2, LE", VID = "UIU"), .Names = c("Descrption", 
                                                                   "VID"), class = "data.frame", row.names = 1L))), .Names = c("Processed", 
                                                                                                                               "Gmail", "Information"), class = "data.frame", row.names = 1L))), row.names = c(NA, 
                                                                                                                                                                                                               3L), class = "data.frame", .Names = c("Policy", "Item"))

注意:

注意我使用了两次unnest,因为原始数据框中有两个级别的列表。 unnest会自动展平数据框中的所有列表并重复使用这些名称,但它不会递归执行,因此您必须拥有与列表级别一样多的unnest