避免将data_frame的嵌套循环嵌套到嵌套的JSON中

时间:2018-03-19 08:29:23

标签: json r purrr

鉴于代表某种层次结构的data_frame,我想将此数据转换为具有特定结构的嵌套JSON。

鉴于此data_frame

df <- data_frame(
    "parent" = c("A", "A", "A", "A", "A", "B", "B", "B", "B", "C", "C", "C", "C", "C", "C"),
    "child1" = c("a", "a", "b", "b", "c", "d", "d", "e", "e", "f", "f", "f", "f", "g", "g"),
    "child2" = c("aa", "ab", "ba", "bb", "ca", "da", "db", "ea", "eb", "fa", "fb", "fc", "fd", "ga", "gb"),
    "value" = sample(seq(1,100,1), 15)
)

我想产生以下JSON结构:

[
  {
    "name": "A",
    "children": [
      {
        "name": "a",
        "children": [
          {"name": "aa", "value": 89},
          {"name": "ab", "value": 20}
        ]
      },
      {
        "name": "b",
        "children": [
          {"name": "ba", "value": 25},
          {"name": "bb", "value": 15}
        ]
      },
      {
        "name": "c",
        "children": [
          {"name": "ca","value": 95}
        ]
      }
    ]
  },
  {
    "name": "B",
    "children": [
      {
        "name": "d",
        "children": [
          {"name": "da", "value": 54},
          {"name": "db", "value": 62}
        ]
      },
      {
        "name": "e",
        "children": [
          {"name": "ea", "value": 100},
          {"name": "eb", "value": 56}
        ]
      }
    ]
  },
  {
    "name": "C",
    "children": [
      {
        "name": "f",
        "children": [
          {"name": "fa", "value": 69},
          {"name": "fb", "value": 98},
          {"name": "fc", "value": 83},
          {"name": "fd", "value": 63}
        ]
      },
      {
        "name": "g",
        "children": [
          {"name": "ga", "value": 91},
          {"name": "gb", "value": 77}
        ]
      }
    ]
  }
] 

目前,我使用嵌套循环构建嵌套列表,如下所示:

lll <- list()
i <- 1

for (a in unique(df$parent)) {

  lll[[i]] <- list(
    "name" = a,
    "children" = list()
  )

  ii <- 1

  for (b in unique(df$child1[df$parent == a])) {
    lll[[i]]$children[[ii]] <- list(
      "name" = b,
      "children" = list()
    )

    iii <- 1

    for(c in unique(df$child2[df$parent == a & df$child1 == b])) {

      lll[[i]]$children[[ii]]$children[[iii]] <- list(
        "name" = c,
        "value" = df$value[df$parent == a & df$child1 == b & df$child2 == c ]
      )

      iii <- iii + 1

    }

    ii <- ii + 1
  }

  i <- i + 1

}

使用jsonlite::toJSON(lll, pretty = TRUE, auto_unbox = TRUE)可以将此列表转换为嵌套的JSON。

我想知道是否有更优雅的方式。我试图用purrr解决这个问题,但我没有成功。

2 个答案:

答案 0 :(得分:2)

您可以使用dplyr::group_by()tidyr::nest()的组合来实现此目的:

library(dplyr)
library(tidyr)

df %>% 
  rename(name = child2) %>% 
  group_by(parent, child1) %>% 
  nest(.key = "children") %>% 
  rename(name = child1) %>% 
  group_by(parent) %>% 
  nest(.key = "children") %>% 
  rename(name = parent) %>% 
  jsonlite::toJSON(pretty = TRUE, auto_unbox = TRUE)
#> [
#>   {
#>     "name": "A",
#>     "children": [
#>       {
#>         "name": "a",
#>         "children": [
#>           {
#>             "name": "aa",
#>             "value": 64
#>           },
#>           {
#>             "name": "ab",
#>             "value": 29
#>           }
#>         ]
#>       },
#>       {
#>         "name": "b",
#>         "children": [
#>           {
#>             "name": "ba",
#>             "value": 73
#>           },
#>           {
#>             "name": "bb",
#>             "value": 45
#>           }
#>         ]
#>       },
#>       {
#>         "name": "c",
#>         "children": [
#>           {
#>             "name": "ca",
#>             "value": 95
#>           }
#>         ]
#>       }
#>     ]
#>   },
#>   {
#>     "name": "B",
#>     "children": [
#>       {
#>         "name": "d",
#>         "children": [
#>           {
#>             "name": "da",
#>             "value": 26
#>           },
#> ...

为了重现您的列名,通过调用dplyr::rename,代码变得更加笨拙。没有它们,操作的结构就会变得更加明显:

df %>% 
  group_by(parent, child1) %>% 
  nest() %>% 
  group_by(parent) %>% 
  nest() %>% 
  jsonlite::toJSON(pretty = TRUE, auto_unbox = TRUE)

答案 1 :(得分:0)

我不知道这是否是一个更优雅的解决方案,但它功能齐全,而且效率可能更高。

for循环没有任何问题,除了更新变量时出现副作用的问题,但使用列表从键映射到值最多会给你一个二次运行时间进行更新。我使用我matchbox package的红黑搜索树解决了这个问题。我还使用pmatch package.

中的bind语法
## Pattern matching + bind[...] syntax
#devtools::install_github("mailund/pmatch")
library(pmatch)

## linked list and rbt-map data structures
#devtools::install_github("mailund/matchbox")
library(matchbox)

首先,我将数据框转换为红黑图的红黑图。这里的数据框架结构是硬连线的,但是对代码进行概括不是一个很大的问题。

## Building a hierarchy of maps from the data frame

# returns the value for a key in a red-black tree unless the
# key is not in the tree, in which case it returns a new
# empty tree
match_or_empty <- function(tree, key) {
    if (rbt_map_member(tree, key))
        rbt_map_get(tree, key)
    else
        empty_red_black_map()
}
get_row_nodes <- function(df, row, nodes) {
    bind[parent, child1, child2, value] <- df[row,]
    parent_node <- match_or_empty(nodes, parent)
    child1_node <- match_or_empty(parent_node, child1)
    list(parent_node, child1_node)
}

build_hierarchy_row <- function(df, row, nodes) {
    bind[parent, child1, child2, value] <- df[row,]
    bind[parent_node, child_node] <- get_row_nodes(df, row, nodes)
    child_node <- rbt_map_insert(child_node, child2, value)
    parent_node <- rbt_map_insert(parent_node, child1, child_node)
    rbt_map_insert(nodes, parent, parent_node)
}
build_hierarchy <- function(df) {
    nodes <- empty_red_black_map()
    for (i in seq_along(df$parent)) {
        nodes <- build_hierarchy_row(df, i, nodes)
    }
    nodes
}

接下来,我将此结构转换为json字符串。

## Translating the hierarchy of rbt-maps into son
library(magrittr)
library(glue)

# this should probably be in matchbox but it isn't yet.
rbt_map_to_llist <- function(tree, f, acc = NIL) {
    if (is_red_black_map_empty(tree)) acc
    else {
        left_result <- rbt_map_to_llist(tree$left, f, acc)
        right_result <- rbt_map_to_llist(tree$right, f, left_result)
        CONS(list(key=tree$key, val=tree$val), right_result)
    }
}

llist_to_json <- function(lst) {
    paste0("[", paste0(lst, collapse = ", "), "]")
}
to_json <- function(node) {
    bind[key, val] <- node
    if (inherits(val, "rbt_map")) {
        children <- val %>% rbt_map_to_llist %>% llmap(to_json) %>% llist_to_json
        glue::glue('{{ "name": "{key}", "children" = {children} }}')

    } else {
        glue::glue('{{ "name": "{key}", "value": {val} }')
    }
}

现在,我们可以将这两个步骤与一些基于管道的无点编程结合起来:

df_to_json <- . %>%
    build_hierarchy %>%
    rbt_map_to_llist %>%
    llmap(to_json) %>% 
    llist_to_json %>%
    cat

测试它:

library(tibble)
df <- tibble(
    parent = c("A", "A", "A", "A", "A", "B", "B", "B", "B", "C", "C", "C", "C", "C", "C"),
    child1 = c("a", "a", "b", "b", "c", "d", "d", "e", "e", "f", "f", "f", "f", "g", "g"),
    child2 = c("aa", "ab", "ba", "bb", "ca", "da", "db", "ea", "eb", "fa", "fb", "fc", "fd", "ga", "gb"),
    value  = sample(seq(1,100,1), 15)
)

> df_to_json(df)
[{ "name": "B", "children" = [{ "name": "d", "children" = [{ "name": "da", "value": 12 }, { "name": "db", "value": 88 }] }, { "name": "e", "children" = [{ "name": "ea", "value": 17 }, { "name": "eb", "value": 94 }] }] }, { "name": "C", "children" = [{ "name": "f", "children" = [{ "name": "fb", "value": 46 }, { "name": "fc", "value": 1 }, { "name": "fd", "value": 100 }, { "name": "fa", "value": 86 }] }, { "name": "g", "children" = [{ "name": "ga", "value": 97 }, { "name": "gb", "value": 19 }] }] }, { "name": "A", "children" = [{ "name": "b", "children" = [{ "name": "ba", "value": 54 }, { "name": "bb", "value": 64 }] }, { "name": "c", "children" = [{ "name": "ca", "value": 22 }] }, { "name": "a", "children" = [{ "name": "aa", "value": 63 }, { "name": "ab", "value": 76 }] }] }]