鉴于代表某种层次结构的data_frame
,我想将此数据转换为具有特定结构的嵌套JSON。
鉴于此data_frame
df <- data_frame(
"parent" = c("A", "A", "A", "A", "A", "B", "B", "B", "B", "C", "C", "C", "C", "C", "C"),
"child1" = c("a", "a", "b", "b", "c", "d", "d", "e", "e", "f", "f", "f", "f", "g", "g"),
"child2" = c("aa", "ab", "ba", "bb", "ca", "da", "db", "ea", "eb", "fa", "fb", "fc", "fd", "ga", "gb"),
"value" = sample(seq(1,100,1), 15)
)
我想产生以下JSON结构:
[
{
"name": "A",
"children": [
{
"name": "a",
"children": [
{"name": "aa", "value": 89},
{"name": "ab", "value": 20}
]
},
{
"name": "b",
"children": [
{"name": "ba", "value": 25},
{"name": "bb", "value": 15}
]
},
{
"name": "c",
"children": [
{"name": "ca","value": 95}
]
}
]
},
{
"name": "B",
"children": [
{
"name": "d",
"children": [
{"name": "da", "value": 54},
{"name": "db", "value": 62}
]
},
{
"name": "e",
"children": [
{"name": "ea", "value": 100},
{"name": "eb", "value": 56}
]
}
]
},
{
"name": "C",
"children": [
{
"name": "f",
"children": [
{"name": "fa", "value": 69},
{"name": "fb", "value": 98},
{"name": "fc", "value": 83},
{"name": "fd", "value": 63}
]
},
{
"name": "g",
"children": [
{"name": "ga", "value": 91},
{"name": "gb", "value": 77}
]
}
]
}
]
目前,我使用嵌套循环构建嵌套列表,如下所示:
lll <- list()
i <- 1
for (a in unique(df$parent)) {
lll[[i]] <- list(
"name" = a,
"children" = list()
)
ii <- 1
for (b in unique(df$child1[df$parent == a])) {
lll[[i]]$children[[ii]] <- list(
"name" = b,
"children" = list()
)
iii <- 1
for(c in unique(df$child2[df$parent == a & df$child1 == b])) {
lll[[i]]$children[[ii]]$children[[iii]] <- list(
"name" = c,
"value" = df$value[df$parent == a & df$child1 == b & df$child2 == c ]
)
iii <- iii + 1
}
ii <- ii + 1
}
i <- i + 1
}
使用jsonlite::toJSON(lll, pretty = TRUE, auto_unbox = TRUE)
可以将此列表转换为嵌套的JSON。
我想知道是否有更优雅的方式。我试图用purrr
解决这个问题,但我没有成功。
答案 0 :(得分:2)
您可以使用dplyr::group_by()
和tidyr::nest()
的组合来实现此目的:
library(dplyr)
library(tidyr)
df %>%
rename(name = child2) %>%
group_by(parent, child1) %>%
nest(.key = "children") %>%
rename(name = child1) %>%
group_by(parent) %>%
nest(.key = "children") %>%
rename(name = parent) %>%
jsonlite::toJSON(pretty = TRUE, auto_unbox = TRUE)
#> [
#> {
#> "name": "A",
#> "children": [
#> {
#> "name": "a",
#> "children": [
#> {
#> "name": "aa",
#> "value": 64
#> },
#> {
#> "name": "ab",
#> "value": 29
#> }
#> ]
#> },
#> {
#> "name": "b",
#> "children": [
#> {
#> "name": "ba",
#> "value": 73
#> },
#> {
#> "name": "bb",
#> "value": 45
#> }
#> ]
#> },
#> {
#> "name": "c",
#> "children": [
#> {
#> "name": "ca",
#> "value": 95
#> }
#> ]
#> }
#> ]
#> },
#> {
#> "name": "B",
#> "children": [
#> {
#> "name": "d",
#> "children": [
#> {
#> "name": "da",
#> "value": 26
#> },
#> ...
为了重现您的列名,通过调用dplyr::rename
,代码变得更加笨拙。没有它们,操作的结构就会变得更加明显:
df %>%
group_by(parent, child1) %>%
nest() %>%
group_by(parent) %>%
nest() %>%
jsonlite::toJSON(pretty = TRUE, auto_unbox = TRUE)
答案 1 :(得分:0)
我不知道这是否是一个更优雅的解决方案,但它功能齐全,而且效率可能更高。
for循环没有任何问题,除了更新变量时出现副作用的问题,但使用列表从键映射到值最多会给你一个二次运行时间进行更新。我使用我matchbox
package的红黑搜索树解决了这个问题。我还使用pmatch
package.
bind
语法
## Pattern matching + bind[...] syntax
#devtools::install_github("mailund/pmatch")
library(pmatch)
## linked list and rbt-map data structures
#devtools::install_github("mailund/matchbox")
library(matchbox)
首先,我将数据框转换为红黑图的红黑图。这里的数据框架结构是硬连线的,但是对代码进行概括不是一个很大的问题。
## Building a hierarchy of maps from the data frame
# returns the value for a key in a red-black tree unless the
# key is not in the tree, in which case it returns a new
# empty tree
match_or_empty <- function(tree, key) {
if (rbt_map_member(tree, key))
rbt_map_get(tree, key)
else
empty_red_black_map()
}
get_row_nodes <- function(df, row, nodes) {
bind[parent, child1, child2, value] <- df[row,]
parent_node <- match_or_empty(nodes, parent)
child1_node <- match_or_empty(parent_node, child1)
list(parent_node, child1_node)
}
build_hierarchy_row <- function(df, row, nodes) {
bind[parent, child1, child2, value] <- df[row,]
bind[parent_node, child_node] <- get_row_nodes(df, row, nodes)
child_node <- rbt_map_insert(child_node, child2, value)
parent_node <- rbt_map_insert(parent_node, child1, child_node)
rbt_map_insert(nodes, parent, parent_node)
}
build_hierarchy <- function(df) {
nodes <- empty_red_black_map()
for (i in seq_along(df$parent)) {
nodes <- build_hierarchy_row(df, i, nodes)
}
nodes
}
接下来,我将此结构转换为json字符串。
## Translating the hierarchy of rbt-maps into son
library(magrittr)
library(glue)
# this should probably be in matchbox but it isn't yet.
rbt_map_to_llist <- function(tree, f, acc = NIL) {
if (is_red_black_map_empty(tree)) acc
else {
left_result <- rbt_map_to_llist(tree$left, f, acc)
right_result <- rbt_map_to_llist(tree$right, f, left_result)
CONS(list(key=tree$key, val=tree$val), right_result)
}
}
llist_to_json <- function(lst) {
paste0("[", paste0(lst, collapse = ", "), "]")
}
to_json <- function(node) {
bind[key, val] <- node
if (inherits(val, "rbt_map")) {
children <- val %>% rbt_map_to_llist %>% llmap(to_json) %>% llist_to_json
glue::glue('{{ "name": "{key}", "children" = {children} }}')
} else {
glue::glue('{{ "name": "{key}", "value": {val} }')
}
}
现在,我们可以将这两个步骤与一些基于管道的无点编程结合起来:
df_to_json <- . %>%
build_hierarchy %>%
rbt_map_to_llist %>%
llmap(to_json) %>%
llist_to_json %>%
cat
测试它:
library(tibble)
df <- tibble(
parent = c("A", "A", "A", "A", "A", "B", "B", "B", "B", "C", "C", "C", "C", "C", "C"),
child1 = c("a", "a", "b", "b", "c", "d", "d", "e", "e", "f", "f", "f", "f", "g", "g"),
child2 = c("aa", "ab", "ba", "bb", "ca", "da", "db", "ea", "eb", "fa", "fb", "fc", "fd", "ga", "gb"),
value = sample(seq(1,100,1), 15)
)
> df_to_json(df)
[{ "name": "B", "children" = [{ "name": "d", "children" = [{ "name": "da", "value": 12 }, { "name": "db", "value": 88 }] }, { "name": "e", "children" = [{ "name": "ea", "value": 17 }, { "name": "eb", "value": 94 }] }] }, { "name": "C", "children" = [{ "name": "f", "children" = [{ "name": "fb", "value": 46 }, { "name": "fc", "value": 1 }, { "name": "fd", "value": 100 }, { "name": "fa", "value": 86 }] }, { "name": "g", "children" = [{ "name": "ga", "value": 97 }, { "name": "gb", "value": 19 }] }] }, { "name": "A", "children" = [{ "name": "b", "children" = [{ "name": "ba", "value": 54 }, { "name": "bb", "value": 64 }] }, { "name": "c", "children" = [{ "name": "ca", "value": 22 }] }, { "name": "a", "children" = [{ "name": "aa", "value": 63 }, { "name": "ab", "value": 76 }] }] }]