数据框

时间:2018-01-18 23:11:47

标签: r list dplyr purrr

我有dataframe_A,其中有一列,每行包含一个数据帧。

以下是我所说的数据帧的输入:

structure(list(spend = c("840.99", "1145.39", "1043.88", "1169.15", 
"1249.75", "1197.57"), actions = list(structure(list(action_type = c("like", 

"link_click", "post", "post_reaction", "page_engagement", "post_engagement"
), value = c("18", "205", "2", "28", "253", "235")), .Names = 
c("action_type", 
"value"), class = "data.frame", row.names = c(NA, 6L)), structure(list(
action_type = c("like", "link_click", "post", "post_reaction", 
"page_engagement", "post_engagement"), value = c("26", "268", 
"3", "33", "330", "304")), .Names = c("action_type", "value"
), class = "data.frame", row.names = c(NA, 6L)), structure(list(
action_type = c("like", "link_click", "post", "post_reaction", 
"page_engagement", "post_engagement"), value = c("16", "214", 
"2", "28", "260", "244")), .Names = c("action_type", "value"
), class = "data.frame", row.names = c(NA, 6L)), structure(list(
action_type = c("comment", "like", "link_click", "post", 
"post_reaction", "page_engagement", "post_engagement"), value = c("2", 
"14", "255", "2", "44", "317", "303")), .Names = c("action_type", 
"value"), class = "data.frame", row.names = c(NA, 7L)), structure(list(
action_type = c("comment", "like", "link_click", "post", 
"post_reaction", "page_engagement", "post_engagement"), value = c("1", 
"15", "240", "1", "23", "280", "265")), .Names = c("action_type", 
"value"), class = "data.frame", row.names = c(NA, 7L)), structure(list(
action_type = c("like", "link_click", "post", "post_reaction", 
"page_engagement", "post_engagement"), value = c("16", "252", 
"3", "32", "303", "287")), .Names = c("action_type", "value"
), class = "data.frame", row.names = c(NA, 6L))), date_start = c("2017-10-31", 
"2017-11-01", "2017-11-02", "2017-11-03", "2017-11-04", "2017-11-05"
)), .Names = c("spend", "actions", "date_start"), class = c("data.table", 
"data.frame"), row.names = c(NA, -6L), .internal.selfref = <pointer: 
0x0000000000310788>) 

输出如下:

     spend      actions date_start
1:  840.99 <data.frame> 2017-10-31
2: 1145.39 <data.frame> 2017-11-01
3: 1043.88 <data.frame> 2017-11-02
4: 1169.15 <data.frame> 2017-11-03
5: 1249.75 <data.frame> 2017-11-04
6: 1197.57 <data.frame> 2017-11-05

如果我只查看actions列,它看起来像这样:

df$actions
[[1]]
      action_type value
1            like    18
2      link_click   205
3            post     2
4   post_reaction    28
5 page_engagement   253
6 post_engagement   235

[[2]]
      action_type value
1            like    26
2      link_click   268
3            post     3
4   post_reaction    33
5 page_engagement   330
6 post_engagement   304

[[3]]
      action_type value
1            like    16
2      link_click   214
3            post     2
4   post_reaction    28
5 page_engagement   260
6 post_engagement   244

[[4]]
      action_type value
1         comment     2
2            like    14
3      link_click   255
4            post     2
5   post_reaction    44
6 page_engagement   317
7 post_engagement   303

[[5]]
      action_type value
1         comment     1
2            like    15
3      link_click   240
4            post     1
5   post_reaction    23
6 page_engagement   280
7 post_engagement   265

[[6]]
      action_type value
1            like    16
2      link_click   252
3            post     3
4   post_reaction    32
5 page_engagement   303
6 post_engagement   287

您可以看到每个数据框对应于原始数据框中的一行。如何提取这些数据框并使行名称将附加到原始数据框的新列。我希望我的最终结果是这样的(当我这样做时,格式化很奇怪所以我不能写最后3列,但那些也应该填充):

     spend      actions date_start    comment    like   link_click    post    post_reaction    page_engagement     post_engagement   
1:  840.99 <data.frame> 2017-10-31          0      18          205       2 
2: 1145.39 <data.frame> 2017-11-01          0      26          268       3              
3: 1043.88 <data.frame> 2017-11-02          0      16          214       2                  
4: 1169.15 <data.frame> 2017-11-03          2      14          255       2                          
5: 1249.75 <data.frame> 2017-11-04          1      15          240       1                             
6: 1197.57 <data.frame> 2017-11-05          0      16          252       3  

不确定这是否是正确的路线,但我正在考虑这些方面的内容并将结果重新绑定并将其绑定到原始数​​据帧:

t(df$actions[[1]])

任何帮助都会很棒,谢谢!

1 个答案:

答案 0 :(得分:1)

也许这种方法:

library(tidyverse) 

   z1 %>%
      as.tibble() %>%
      unnest() %>%
      spread(action_type, value) #convert to wide format
#output
  spend   date_start comment like  link_click page_engagement post  post_engagement post_reaction
* <chr>   <chr>      <chr>   <chr> <chr>      <chr>           <chr> <chr>           <chr>        
1 1043.88 2017-11-02 NA      16    214        260             2     244             28           
2 1145.39 2017-11-01 NA      26    268        330             3     304             33           
3 1169.15 2017-11-03 2       14    255        317             2     303             44           
4 1197.57 2017-11-05 NA      16    252        303             3     287             32           
5 1249.75 2017-11-04 1       15    240        280             1     265             23           
6 840.99  2017-10-31 NA      18    205        253             2     235             28   

数据:

> dput(z1)
structure(list(spend = c("840.99", "1145.39", "1043.88", "1169.15", 
"1249.75", "1197.57"), actions = list(structure(list(action_type = c("like", 
"link_click", "post", "post_reaction", "page_engagement", "post_engagement"
), value = c("18", "205", "2", "28", "253", "235")), .Names = c("action_type", 
"value"), class = "data.frame", row.names = c(NA, 6L)), structure(list(
    action_type = c("like", "link_click", "post", "post_reaction", 
    "page_engagement", "post_engagement"), value = c("26", "268", 
    "3", "33", "330", "304")), .Names = c("action_type", "value"
), class = "data.frame", row.names = c(NA, 6L)), structure(list(
    action_type = c("like", "link_click", "post", "post_reaction", 
    "page_engagement", "post_engagement"), value = c("16", "214", 
    "2", "28", "260", "244")), .Names = c("action_type", "value"
), class = "data.frame", row.names = c(NA, 6L)), structure(list(
    action_type = c("comment", "like", "link_click", "post", 
    "post_reaction", "page_engagement", "post_engagement"), value = c("2", 
    "14", "255", "2", "44", "317", "303")), .Names = c("action_type", 
"value"), class = "data.frame", row.names = c(NA, 7L)), structure(list(
    action_type = c("comment", "like", "link_click", "post", 
    "post_reaction", "page_engagement", "post_engagement"), value = c("1", 
    "15", "240", "1", "23", "280", "265")), .Names = c("action_type", 
"value"), class = "data.frame", row.names = c(NA, 7L)), structure(list(
    action_type = c("like", "link_click", "post", "post_reaction", 
    "page_engagement", "post_engagement"), value = c("16", "252", 
    "3", "32", "303", "287")), .Names = c("action_type", "value"
), class = "data.frame", row.names = c(NA, 6L))), date_start = c("2017-10-31", 
"2017-11-01", "2017-11-02", "2017-11-03", "2017-11-04", "2017-11-05"
)), .Names = c("spend", "actions", "date_start"), class = c("data.table", 
"data.frame"), row.names = c(NA, -6L))