从数据框列中的嵌套数据框列表在数据框中创建列

时间:2018-10-16 12:57:42

标签: r

这是我的数据框的样子

str(FBInsightsExpanded)

Classes ‘data.table’ and 'data.frame':  4 obs. of  3 variables:
 $ impressions          : chr  "26580" "5741" "7335" "4123" ...
 $ date_start           : chr  "2018-10-14" "2018-10-14" "2018-10-14" "2018-10-14" ...
 $ action_values        :List of 4
  ..$ :'data.frame':    11 obs. of  2 variables:
  .. ..$ action_type: chr  "app_custom_event.fb_mobile_add_to_cart" "app_custom_event.fb_mobile_add_to_wishlist" "app_custom_event.fb_mobile_content_view" "app_custom_event.fb_mobile_initiated_checkout" ...
  .. ..$ value      : chr  "40505.79" "9262.82" "470464" "2319.35" ...
  ..$ :'data.frame':    8 obs. of  2 variables:
  .. ..$ action_type: chr  "app_custom_event.fb_mobile_add_to_cart" "app_custom_event.fb_mobile_add_to_wishlist" "app_custom_event.fb_mobile_content_view" "app_custom_event.fb_mobile_initiated_checkout" ...
  .. ..$ value      : chr  "26309.21" "6970.84" "697029" "196.8" ...
  ..$ :'data.frame':    10 obs. of  2 variables:
  .. ..$ action_type: chr  "app_custom_event.fb_mobile_add_to_cart" "app_custom_event.fb_mobile_add_to_wishlist" "app_custom_event.fb_mobile_content_view" "app_custom_event.fb_mobile_initiated_checkout" ...
  .. ..$ value      : chr  "42180.84" "4852.95" "282354" "5152.95" ...
  ..$ :'data.frame':    8 obs. of  2 variables:
  .. ..$ action_type: chr  "app_custom_event.fb_mobile_add_to_cart" "app_custom_event.fb_mobile_add_to_wishlist" "app_custom_event.fb_mobile_content_view" "app_custom_event.fb_mobile_initiated_checkout" ...
  .. ..$ value      : chr  "253737.97" "44378.59" "575184.59" "5294" ...

我需要的是一个看起来像这样的数据框:

impressions   date_start  fb_mobile_add_to_wishlist fb_mobile_content_view fb_mobile_initiated_checkout

2 个答案:

答案 0 :(得分:1)

“ df”是您的data.frame,请尝试:

purrr::pmap_df(list(df$impressions,df$date_start,df$action_values)
              ,function(x,y,z) 
                  if (is.null(z)) data.frame(impressions=x, date_start=y,stringsAsFactors=FALSE)
                  else z %>% 
                    spread("action_type","value") %>% 
                    mutate(impressions=x, date_start=y) %>%
                    select(impressions,date_start,everything()) %>%
                    rename_at(vars(matches("^app_custom_event\\.")),funs(str_replace(.,"^app_custom_event\\.","")))
) 

#  impressions date_start fb_mobile_add_to_cart fb_mobile_add_to_wishlist fb_mobile_content_view
#1       26580 2018-10-14              40505.79                   9262.82                   <NA>
#2        5741 2018-10-14              26309.21                   6970.84                 697029
#3        7335 2018-10-14                  <NA>                      <NA>                   <NA>

4 4123 2018-10-14 42180.84 4852.95 282354

数据:

df <- data.frame(impressions=c("26580", "5741", "7335", "4123"),
             date_start=c("2018-10-14", "2018-10-14", "2018-10-14", "2018-10-14"),
            action_values=I(list( 
 data.frame(action_type=c("app_custom_event.fb_mobile_add_to_cart", "app_custom_event.fb_mobile_add_to_wishlist"),
       value=c(  "40505.79", "9262.82"),stringsAsFactors=FALSE),
 data.frame(action_type=c("app_custom_event.fb_mobile_add_to_cart", "app_custom_event.fb_mobile_add_to_wishlist", "app_custom_event.fb_mobile_content_view"),
       value=c(  "26309.21", "6970.84", "697029"),stringsAsFactors=FALSE),
 NULL,
 data.frame(action_type=c("app_custom_event.fb_mobile_add_to_cart", "app_custom_event.fb_mobile_add_to_wishlist", "app_custom_event.fb_mobile_content_view"),
       value=c(  "42180.84", "4852.95", "282354"),stringsAsFactors=FALSE)
)),stringsAsFactors=FALSE)

答案 1 :(得分:0)

由于Nicholas2提供了数据:

data.frame(
  impressions = c("26580", "5741", "7335", "4123"),
  date_start = c("2018-10-14", "2018-10-14", "2018-10-14", "2018-10-14"),
  action_values = I(list(
    data.frame(
      action_type = c("app_custom_event.fb_mobile_add_to_cart", "app_custom_event.fb_mobile_add_to_wishlist", "app_custom_event.fb_mobile_content_view"),
      value = c("40505.79", "9262.82", "470464"), 
      stringsAsFactors = FALSE
    ),
    data.frame(
      action_type = c("app_custom_event.fb_mobile_add_to_cart", "app_custom_event.fb_mobile_add_to_wishlist", "app_custom_event.fb_mobile_content_view"),
      value = c("26309.21", "6970.84", "697029"), 
      stringsAsFactors = FALSE
    ),
    data.frame(
      action_type = c("app_custom_event.fb_mobile_add_to_cart", "app_custom_event.fb_mobile_add_to_wishlist", "app_custom_event.fb_mobile_content_view"),
      value = c("253737.97", "44378.59", "575184.59"), 
      stringsAsFactors = FALSE
    ),
    data.frame(
      action_type = c("app_custom_event.fb_mobile_add_to_cart", "app_custom_event.fb_mobile_add_to_wishlist", "app_custom_event.fb_mobile_content_view"),
      value = c("42180.84", "4852.95", "282354"), 
      stringsAsFactors = FALSE
    )
  )),
  stringsAsFactors = FALSE
) -> xdf

这是一个0依赖的基础R解决方案:

do.call(
  rbind.data.frame,
  lapply(
    1:nrow(xdf), # row by row
    function(.i) {
      x <- xdf$action_values[[.i]] # extract the data frame
      x$action_type <- gsub("app_custom_event.", "", x$action_type) # clean colnames
      x <- as.data.frame(t(unstack(x, value ~ action_type,  stringsAsFactors = FALSE)), stringsAsFactors=FALSE) # reshape it
      x$impressions <- xdf$impressions[[.i]] # add the other two columns
      x$date_start <- xdf$date_start[[.i]]
      rownames(x) <- NULL # not necessary but i like proper rownames
      x
    }
  )
)
##   fb_mobile_add_to_cart fb_mobile_add_to_wishlist fb_mobile_content_view impressions date_start
## 1              40505.79                   9262.82                 470464       26580 2018-10-14
## 2              26309.21                   6970.84                 697029        5741 2018-10-14
## 3             253737.97                  44378.59              575184.59        7335 2018-10-14
## 4              42180.84                   4852.95                 282354        4123 2018-10-14

头对头:

library(microbenchmark)

microbenchmark(
  base = do.call(
    rbind.data.frame,
    lapply(
      1:nrow(xdf), 
      function(.i) {
        x <- xdf$action_values[[.i]]
        x$action_type <- gsub("app_custom_event.", "", x$action_type)
        x <- as.data.frame(t(unstack(x, value ~ action_type, stringsAsFactors = FALSE)), stringsAsFactors=FALSE)
        x$impressions <- xdf$impressions[[.i]]
        x$date_start <- xdf$date_start[[.i]]
        rownames(x) <- NULL
        x
      }
    )
  ),
  tidyverse = purrr::pmap_df(
    list(xdf$impressions, xdf$date_start, xdf$action_values),
    function(x,y,z) z %>%
      tidyr::spread("action_type", "value") %>%
      dplyr::mutate(impressions = x, date_start = y) %>%
      dplyr::select(impressions, date_start, dplyr::everything()) %>%
      dplyr::rename_at(
        dplyr::vars(dplyr::matches("^app_custom_event\\.")),
        dplyr::funs(stringr::str_replace(.,"^app_custom_event\\.",""))
      )
  )
) %>% { print(.) ; . } %>% autoplot()
## Unit: milliseconds
##       expr       min        lq      mean    median        uq       max neval
##       base  1.641284  1.770162  2.320804  1.836188  2.077139  9.536442   100
##  tidyverse 17.152142 17.908212 21.554416 18.503356 22.255375 46.630603   100

enter image description here

也:促使我们为您完成工作。我有点希望人们能够放手,直到您证明您尝试过 something ,但不能让Linux上的57个程序包依赖性与编译解决方案成为唯一的答案。