我想要完成的是将列拆分为同一个表中的多个列。
我的数据:
eventCategory eventAction eVentLabel
HomePage Click {"Name":"Ariel","number":"aaa"}
HomePage Click {"Name":"Dan","number":"bbb"}
HomePage Click {"Name":"Daf","number":"ccc"}
我需要什么:
eventCategory eventAction eVentLabel Name number
HomePage Click {"Name":"Ariel","number":"aaa"} Ariel aaa
HomePage Click {"Name":"Dan","number":"bbb"} Dan bbb
HomePage Click {"Name":"Daf","number":"ccc"} Daf ccc
答案 0 :(得分:5)
Another tidyverse
answer; this time employing jsonlite::fromJSON
and purrr
. This solution transparently handles additional columns embedded in the JSON and fills missing values appropriately.
library(tidyverse)
library(jsonlite)
data.raw <- 'eventCategory eventAction eVentLabel
HomePage Click {"Name":"Ariel","number":"aaa"}
HomePage Click {"Name":"Dan","number":"bbb"}
HomePage Click {"Name":"Daf","number":"ccc"}'
data = read_tsv(data.raw)
data %>%
mutate(new_cols = map(eVentLabel, fromJSON),
new_cols = map(new_cols, as_data_frame)) %>%
unnest(new_cols)
#> # A tibble: 3 x 5
#> eventCategory eventAction eVentLabel Name number
#> <chr> <chr> <chr> <chr> <chr>
#> 1 HomePage Click {"Name":"Ariel","number":"aaa"} Ariel aaa
#> 2 HomePage Click {"Name":"Dan","number":"bbb"} Dan bbb
#> 3 HomePage Click {"Name":"Daf","number":"ccc"} Daf ccc
Please note that unnest
will drop all rows that have null values in the data. Consider this example:
data.raw <- 'eventCategory eventAction eVentLabel
HomePage Click {"Name":"Ariel","number":"aaa"}
HomePage Click {"Name":"Dan","number":"bbb"}
HomePage Click {"Name":"Daf","number":"ccc"}
HomePage Click {}
HomePage Click {"Account": "010001"}'
data = read_tsv(data.raw)
data %>%
mutate(new_cols = map(eVentLabel, fromJSON),
new_cols = map(new_cols, as_data_frame)) %>%
unnest(new_cols)
#> # A tibble: 4 x 6
#> eventCategory eventAction eVentLabel Name number Account
#> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 HomePage Click {"Name":"Ariel","number":"aaa"} Ariel aaa <NA>
#> 2 HomePage Click {"Name":"Dan","number":"bbb"} Dan bbb <NA>
#> 3 HomePage Click {"Name":"Daf","number":"ccc"} Daf ccc <NA>
#> 4 HomePage Click {"Account": "010001"} <NA> <NA> 010001
Note that we drop the row that has empty JSON ({}
) in the original data. We also add a column for the new variable Account, and fill in NA
values appropriately.
Finally, trying to run if there are blank lines on the JSON (e.g. (""
or NA
)) rows will fail; you need to remove those before passing into fromJSON
with a filter
statement. By example:
data %>%
filter(nchar(eVentLabel) > 0, !is.na(eVentLabel)) %>%
...
答案 1 :(得分:1)
一种选择是将字符串分割为:
以提取元素
v1 <- lapply(strsplit(gsub('[{"},]', ':', df1$eVentLabel), ":"),
function(x) {x1 <- trimws(x[nzchar(x)])
setNames(x1[c(FALSE, TRUE)], x1[c(TRUE, FALSE)]) })[[1]]
df1[names(v1)] <- v1
df1
# eventCategory eventAction eVentLabel Name number
#1 HomePage Click {"Name":"Ariel","number":"aaa"} Ariel aaa
对于新数据集
res <- do.call(rbind, lapply(strsplit(gsub('[{"},]', ':', df2$eVentLabel), ":"),
function(x) {x1 <- trimws(x[nzchar(x)])
setNames(x1[c(FALSE, TRUE)], x1[c(TRUE, FALSE)]) }))
df2[names(res)] <- res
df2
# eventCategory eventAction eVentLabel Name number
#1 HomePage Click {"Name":"Ariel","number":"aaa"} Ariel aaa
#2 HomePage Click {"Name":"Dan","number":"bbb"} Dan bbb
#3 HomePage Click {"Name":"Daf","number":"ccc"} Daf ccc
df1 <- structure(list(eventCategory = "HomePage", eventAction = "Click",
eVentLabel = "{\"Name\":\"Ariel\",\"number\":\"aaa\"}"),
.Names = c("eventCategory",
"eventAction", "eVentLabel"), class = "data.frame", row.names = c(NA,
-1L))
df2 <- structure(list(eventCategory = c("HomePage", "HomePage", "HomePage"
), eventAction = c("Click", "Click", "Click"),
eVentLabel = c("{\"Name\":\"Ariel\",\"number\":\"aaa\"}",
"{\"Name\":\"Dan\",\"number\":\"bbb\"}", "{\"Name\":\"Daf\",\"number\":\"ccc\"}"
), Name = c("Ariel", "Dan", "Daf"), number = c("aaa", "bbb",
"ccc")), .Names = c("eventCategory", "eventAction", "eVentLabel",
"Name", "number"), class = "data.frame", row.names = c(NA, -3L
))
答案 2 :(得分:0)
tidyverse
方法
library(tidyverse)
library(stringr)
df <- structure(list(eventCategory = c("HomePage", "HomePage", "HomePage"
), eventAction = c("Click", "Click", "Click"), eventLabel = c("{\"Name\":\"Ariel\",\"number\":\"aaa\"}",
"{\"Name\":\"Dan\",\"number\":\"bbb\"}", "{\"Name\":\"Daf\",\"number\":\"ccc\"}"
)), .Names = c("eventCategory", "eventAction", "eventLabel"), row.names = c(NA,
-3L), class = "data.frame")
eventCategory eventAction eventLabel
1 HomePage Click {"Name":"Ariel","number":"aaa"}
2 HomePage Click {"Name":"Dan","number":"bbb"}
3 HomePage Click {"Name":"Daf","number":"ccc"}
vars <- c("name", "number")
df %>%
separate(eventLabel, into = c("name", "number"), sep = ",") %>%
map_at(vars, ~str_split(., ":")) %>%
as_data_frame() %>%
unnest() %>%
map_at(vars, ~str_replace_all(., "[[:punct:]]", "")) %>%
as_data_frame() %>%
filter(name != "Name")
eventCategory eventAction name number
<chr> <chr> <chr> <chr>
1 HomePage Click Ariel aaa
2 HomePage Click Dan bbb
3 HomePage Click Daf ccc