使用bind_cols(r,dplyr)时设置列名

时间:2017-06-28 21:42:33

标签: r dplyr

我有一个data.frame(df),其中包含另一个名为url_variables的data.frame。

url_variables = df$url_variables

url_variables包含许多其他data.frames,例如 source campaign page 等等。每个数据框都有3列类型。我正在使用bind_cols将所有单独的数据帧组合成一个平面数据帧。

flat_url_variables = bind_cols(as.list(url_variables))

问题是flat_url_variables的列名是非描述性类型 value type 等。如何从每个url变量data.frame( source_key )的名称命名flat_url_variables的列, source_value source_type campaign_key 等)?

EDIT 以下是url_variables的一小部分图片示例: enter image description here

以下是dput(head(url_variables))的输出:

structure(list(`_privatedomain` = structure(list(key = c("_privatedomain", 
"_privatedomain", "_privatedomain", "_privatedomain", "_privatedomain", 
"_privatedomain"), value = c("t", "t", "t", "t", "t", "t"), type = c("url", 
"url", "url", "url", "url", "url")), .Names = c("key", "value", 
"type"), row.names = c(NA, 6L), class = "data.frame"), p = structure(list(
    key = c("p", NA, NA, "p", "p", "p"), value = c("2", NA, NA, 
    "2", "2", "2"), type = c("url", NA, NA, "url", "url", "url"
    )), .Names = c("key", "value", "type"), row.names = c(NA, 
6L), class = "data.frame"), s = structure(list(key = c("s", NA, 
NA, "s", "s", "s"), value = c("incomplete", NA, NA, "incomplete", 
"incomplete", "incomplete"), type = c("url", NA, NA, "url", "url", 
"url")), .Names = c("key", "value", "type"), row.names = c(NA, 
6L), class = "data.frame"), first_name = structure(list(key = c("first_name", 
NA, NA, "first_name", "first_name", "first_name"), value = c("Allan", 
NA, NA, "james", "Sheryl", "Yara"), type = c("url", NA, NA, 
"url", "url", "url")), .Names = c("key", "value", "type"), row.names = c(NA, 
6L), class = "data.frame"), last_name = structure(list(key = c("last_name", 
NA, NA, "last_name", "last_name", "last_name"), value = c("Smith", 
NA, NA, "jones", "Smith", "Keating"), type = c("url", NA, 
NA, "url", "url", "url")), .Names = c("key", "value", "type"), row.names = c(NA, 
6L), class = "data.frame"), email = structure(list(key = c("email", 
NA, NA, "email", "email", "email"), value = c("Allan@email.com", 
NA, NA, "james@email.com", "sheryl@email", "Yara@email.com"
), type = c("url", NA, NA, "url", "url", "url")), .Names = c("key", 
"value", "type"), row.names = c(NA, 6L), class = "data.frame"), 
    phone_number = structure(list(key = c("phone_number", NA, 
    NA, "phone_number", "phone_number", "phone_number"), value = c("0401234567", 
    NA, NA, "0401234567", "0401234567", "0401234567"), type = c("url", 
    NA, NA, "url", "url", "url")), .Names = c("key", "value", 
    "type"), row.names = c(NA, 6L), class = "data.frame"), from = structure(list(
        key = c("from", NA, NA, "from", "from", "from"), value = c("landing_page", 
        NA, NA, "landing_page", "landing_page", "landing_page"
        ), type = c("url", NA, NA, "url", "url", "url")), .Names = c("key", 
    "value", "type"), row.names = c(NA, 6L), class = "data.frame"), 
    snc = structure(list(key = c(NA, NA, "snc", NA, NA, NA), 
        value = c(NA, NA, "1495606827_5925262b571d70.64387871", 
        NA, NA, NA), type = c(NA, NA, "url", NA, NA, NA)), .Names = c("key", 
    "value", "type"), row.names = c(NA, 6L), class = "data.frame"), 
    `__sgtarget` = structure(list(key = c(NA, NA, "__sgtarget", 
    NA, NA, NA), value = c(NA, NA, "10", NA, NA, NA), type = c(NA, 
    NA, "url", NA, NA, NA)), .Names = c("key", "value", "type"
    ), row.names = c(NA, 6L), class = "data.frame"), customertime = structure(list(
        key = c(NA_character_, NA_character_, NA_character_, 
        NA_character_, NA_character_, NA_character_), value = c(NA_character_, 
        NA_character_, NA_character_, NA_character_, NA_character_, 
        NA_character_), type = c(NA_character_, NA_character_, 
        NA_character_, NA_character_, NA_character_, NA_character_
        )), .Names = c("key", "value", "type"), row.names = c(NA, 
    6L), class = "data.frame"), sotime = structure(list(key = c(NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_), value = c(NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_), type = c(NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_)), .Names = c("key", "value", "type"), row.names = c(NA, 
    6L), class = "data.frame"), cancelreschedulelink = structure(list(
        key = c(NA_character_, NA_character_, NA_character_, 
        NA_character_, NA_character_, NA_character_), value = c(NA_character_, 
        NA_character_, NA_character_, NA_character_, NA_character_, 
        NA_character_), type = c(NA_character_, NA_character_, 
        NA_character_, NA_character_, NA_character_, NA_character_
        )), .Names = c("key", "value", "type"), row.names = c(NA, 
    6L), class = "data.frame"), params = structure(list(key = c(NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_), value = c(NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_), type = c(NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_)), .Names = c("key", "value", "type"), row.names = c(NA, 
    6L), class = "data.frame"), icslink = structure(list(key = c(NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_), value = c(NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_), type = c(NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_)), .Names = c("key", "value", "type"), row.names = c(NA, 
    6L), class = "data.frame"), type = structure(list(key = c(NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_), value = c(NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_), type = c(NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_)), .Names = c("key", "value", "type"), row.names = c(NA, 
    6L), class = "data.frame")), .Names = c("_privatedomain", 
"p", "s", "first_name", "last_name", "email", "phone_number", 
"from", "snc", "__sgtarget", "customertime", "sotime", "cancelreschedulelink", 
"params", "icslink", "type"), row.names = c(NA, 6L), class = "data.frame")

1 个答案:

答案 0 :(得分:2)

最简单的方法,如果可能只是自己设置名称。在这里,我使用lapply遍历url_variables的每一列,直接设置名称,然后绑定结果:

flat <-
  names(url_variables) %>%
  lapply(function(thisVar){
    url_variables[[thisVar]] %>%
      setNames(paste0(thisVar, "_", names(.)))
  }) %>%
  bind_cols()

我们可以看到flat[ , 1:6]给出的部分结果:

  _privatedomain_key _privatedomain_value _privatedomain_type p_key p_value p_type
1     _privatedomain                    t                 url     p       2    url
2     _privatedomain                    t                 url  <NA>    <NA>   <NA>
3     _privatedomain                    t                 url  <NA>    <NA>   <NA>
4     _privatedomain                    t                 url     p       2    url
5     _privatedomain                    t                 url     p       2    url
6     _privatedomain                    t                 url     p       2    url