将嵌套列表转换为R中的data.frame

时间:2016-03-03 15:58:47

标签: json r dataframe nested-lists

我将嵌套列表转换为data.frame时遇到问题。

首先,我从Data API下载了JSON格式的数据集:

   request2 <-
  POST(
    url = "https://xxxx",
    add_headers('x-dataapi-key' = "xxxx", 'content-type' = "application/json"),
    body = list(oib = oibreq),
    encode = "json"
  )
jsonContent2 <- content(request2, type = "application/json")
json2 <-
  fromJSON(toJSON(jsonContent2, null = "null"), flatten = TRUE)

对象json2具有嵌套列表的形式。这是一个数据集:

> sample <- dput(json2)
structure(
  list(
    datumStanja = list(
      "2016-03-02T00:00:00+01:00",
      "2016-03-02T00:00:00+01:00",
      "2016-03-02T00:00:00+01:00",
      "2016-03-02T00:00:00+01:00",
      "2016-03-02T00:00:00+01:00",
      "2016-03-02T00:00:00+01:00",
      "2016-03-02T00:00:00+01:00",
      "2016-03-02T00:00:00+01:00",
      "2016-03-02T00:00:00+01:00",
      "2016-03-02T00:00:00+01:00"
    ),
    oib = list(
      "00045103869",
      "92680516748",
      "18527887472",
      "18527887472",
      "18527887472",
      "18527887472",
      "00045103869",
      "00045103869",
      "18527887472",
      "92680516748"
    ),
    iban = list(
      "HR0424840081101570980",
      "HR8623400091110462926",
      "HR9123400091110714260",
      "HR5124850031100201015",
      "HR4224910051100006698",
      "HR7524810001100101268",
      "HR8225000091101167416",
      "HR3223400091110156505",
      "HR6323400091110193874",
      "HR4223300031100429609"
    ),
    blokada = list(TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
                   FALSE, FALSE),
    vbdi = list(
      "2484008",
      "2340009",
      "2340009",
      "2485003",
      "2491005",
      "2481000",
      "2500009",
      "2340009",
      "2340009",
      "2330003"
    ),
    brojRacuna = list(
      "1101570980",
      "1110462926",
      "1110714260",
      "1100201015",
      "1100006698",
      "1100101268",
      "1101167416",
      "1110156505",
      "1110193874",
      "1100429609"
    ),
    banka = list(
      "RAIFFEISENBANK AUSTRIA d.d.",
      "PRIVREDNA BANKA ZAGREB d.d.",
      "PRIVREDNA BANKA ZAGREB d.d.",
      "CROATIA BANKA d.d.",
      "CREDO BANKA d.d.",
      "KREDITNA BANKA ZAGREB d.d.",
      "HYPO ALPE-ADRIA-BANK d.d.",
      "PRIVREDNA BANKA ZAGREB d.d.",
      "PRIVREDNA BANKA ZAGREB d.d.",
      "SOCIETE GENERALE - SPLITSKA BANKA d.d."
    ),
    datumOtvaranja = list(
      "2003-02-19T00:00:00+01:00",
      "2011-02-08T00:00:00+01:00",
      "2015-03-30T00:00:00+02:00",
      "2002-02-21T00:00:00+01:00",
      "2002-04-16T00:00:00+02:00",
      "2002-06-24T00:00:00+02:00",
      "2004-07-19T00:00:00+02:00",
      "2004-09-08T00:00:00+02:00",
      "2005-09-28T00:00:00+02:00",
      "2009-12-21T00:00:00+01:00"
    ),
    datumZatvaranja = list(
      NULL,
      NULL,
      NULL,
      "2009-11-06T00:00:00+01:00",
      "2009-02-17T00:00:00+01:00",
      "2009-03-18T00:00:00+01:00",
      "2008-08-14T00:00:00+02:00",
      "2009-07-13T00:00:00+02:00",
      "2013-09-18T00:00:00+02:00",
      "2013-07-09T00:00:00+02:00"
    ),
    povijestBlokada = list(
      structure(
        list(
          pocetak = list(
            "2011-08-04T00:00:00+02:00",
            "2011-09-06T00:00:00+02:00",
            "2011-11-25T00:00:00+01:00",
            "2011-12-30T00:00:00+01:00",
            "2012-02-20T00:00:00+01:00",
            "2012-03-23T00:00:00+01:00",
            "2012-05-21T00:00:00+02:00"
          ),
          kraj = list(
            "2011-08-10T00:00:00+02:00",
            "2011-09-13T00:00:00+02:00",
            "2011-12-28T00:00:00+01:00",
            "2012-01-16T00:00:00+01:00",
            "2012-03-16T00:00:00+01:00",
            "2012-05-16T00:00:00+02:00",
            NULL
          ),
          brojDana = list(6L, 7L, 33L, 17L, 25L, 54L,
                          1381L)
        ),
        .Names = c("pocetak", "kraj", "brojDana"),
        class = "data.frame",
        row.names = c(NA, 7L)
      ),
      structure(
        list(
          pocetak = list(
            "2012-05-30T00:00:00+02:00",
            "2012-06-21T00:00:00+02:00",
            "2012-06-29T00:00:00+02:00",
            "2012-09-06T00:00:00+02:00",
            "2014-06-09T00:00:00+02:00"
          ),
          kraj = list(
            "2012-06-05T00:00:00+02:00",
            "2012-06-26T00:00:00+02:00",
            "2012-07-03T00:00:00+02:00",
            "2013-03-06T00:00:00+01:00",
            NULL
          ),
          brojDana = list(6L, 5L, 4L, 181L, 632L)
        ),
        .Names = c("pocetak", "kraj",
                   "brojDana"),
        class = "data.frame",
        row.names = c(NA, 5L)
      ),

      structure(
        list(
          pocetak = list("2015-03-31T00:00:00+02:00"),
          kraj = list("2015-09-30T00:00:00+02:00"),
          brojDana = list(183L)
        ),
        .Names = c("pocetak", "kraj", "brojDana"),
        class = "data.frame",
        row.names = 1L
      ),
      structure(
        list(),
        .Names = character(0),
        row.names = integer(0),
        class = "data.frame"
      ),
      structure(
        list(),
        .Names = character(0),
        row.names = integer(0),
        class = "data.frame"
      ),
      structure(
        list(),
        .Names = character(0),
        row.names = integer(0),
        class = "data.frame"
      ),
      structure(
        list(),
        .Names = character(0),
        row.names = integer(0),
        class = "data.frame"
      ),
      structure(
        list(),
        .Names = character(0),
        row.names = integer(0),
        class = "data.frame"
      ),
      structure(
        list(),
        .Names = character(0),
        row.names = integer(0),
        class = "data.frame"
      ),
      structure(
        list(
          pocetak = list(
            "2012-05-30T00:00:00+02:00",
            "2012-06-21T00:00:00+02:00",
            "2012-06-29T00:00:00+02:00",
            "2012-09-06T00:00:00+02:00"
          ),
          kraj = list(
            "2012-06-05T00:00:00+02:00",
            "2012-06-26T00:00:00+02:00",
            "2012-07-03T00:00:00+02:00",
            "2013-03-06T00:00:00+01:00"
          ),
          brojDana = list(6L,
                          5L, 4L, 181L)
        ),
        .Names = c("pocetak", "kraj", "brojDana"),
        class = "data.frame",
        row.names = c(NA, 4L)
      )
    ),
    isActive = list(TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE,
                    FALSE, FALSE)
  ),
  .Names = c(
    "datumStanja",
    "oib",
    "iban",
    "blokada",
    "vbdi",
    "brojRacuna",
    "banka",
    "datumOtvaranja",
    "datumZatvaranja",
    "povijestBlokada",
    "isActive"
  ),
  class = "data.frame",
  row.names = c(NA,
                10L)
)

以下是第一个原始结构的结构:

str(json2[1,])
'data.frame':   1 obs. of  11 variables:
 $ datumStanja    :List of 1
  ..$ : chr "2016-03-02T00:00:00+01:00"
 $ oib            :List of 1
  ..$ : chr "00045103869"
 $ iban           :List of 1
  ..$ : chr "HR0424840081101570980"
 $ blokada        :List of 1
  ..$ : logi TRUE
 $ vbdi           :List of 1
  ..$ : chr "2484008"
 $ brojRacuna     :List of 1
  ..$ : chr "1101570980"
 $ banka          :List of 1
  ..$ : chr "RAIFFEISENBANK AUSTRIA d.d."
 $ datumOtvaranja :List of 1
  ..$ : chr "2003-02-19T00:00:00+01:00"
 $ datumZatvaranja:List of 1
  ..$ : NULL
 $ povijestBlokada:List of 1
  ..$ :'data.frame':    7 obs. of  3 variables:
  .. ..$ pocetak :List of 7
  .. .. ..$ : chr "2011-08-04T00:00:00+02:00"
  .. .. ..$ : chr "2011-09-06T00:00:00+02:00"
  .. .. ..$ : chr "2011-11-25T00:00:00+01:00"
  .. .. ..$ : chr "2011-12-30T00:00:00+01:00"
  .. .. ..$ : chr "2012-02-20T00:00:00+01:00"
  .. .. ..$ : chr "2012-03-23T00:00:00+01:00"
  .. .. ..$ : chr "2012-05-21T00:00:00+02:00"
  .. ..$ kraj    :List of 7
  .. .. ..$ : chr "2011-08-10T00:00:00+02:00"
  .. .. ..$ : chr "2011-09-13T00:00:00+02:00"
  .. .. ..$ : chr "2011-12-28T00:00:00+01:00"
  .. .. ..$ : chr "2012-01-16T00:00:00+01:00"
  .. .. ..$ : chr "2012-03-16T00:00:00+01:00"
  .. .. ..$ : chr "2012-05-16T00:00:00+02:00"
  .. .. ..$ : NULL
  .. ..$ brojDana:List of 7
  .. .. ..$ : int 6
  .. .. ..$ : int 7
  .. .. ..$ : int 33
  .. .. ..$ : int 17
  .. .. ..$ : int 25
  .. .. ..$ : int 54
  .. .. ..$ : int 1381
 $ isActive       :List of 1
  ..$ : logi TRUE

如您所见,变量“povijestBlokada”列表中有列表。我的目标是将这个嵌套的列表对象转换为data.frame,每个不同的类型值有一列

。我尝试使用data.tree包,但我无法使用as.Node函数转换ii。你有什么建议吗?

我正在添加原始的JSON数据(我已经使用了dput,确定有更好的方法,但我不知道如何):

structure("[{\"datumStanja\":[\"2016-03-10T00:00:00+01:00\"],\"oib\":[\"00045103869\"],\"iban\":[\"HR0424840081101570980\"],\"blokada\":[true],\"vbdi\":[\"2484008\"],\"brojRacuna\":[\"1101570980\"],\"banka\":[\"RAIFFEISENBANK AUSTRIA d.d.\"],\"datumOtvaranja\":[\"2003-02-19T00:00:00+01:00\"],\"datumZatvaranja\":null,\"povijestBlokada\":[{\"pocetak\":[\"2011-08-04T00:00:00+02:00\"],\"kraj\":[\"2011-08-10T00:00:00+02:00\"],\"brojDana\":[6]},{\"pocetak\":[\"2011-09-06T00:00:00+02:00\"],\"kraj\":[\"2011-09-13T00:00:00+02:00\"],\"brojDana\":[7]},{\"pocetak\":[\"2011-11-25T00:00:00+01:00\"],\"kraj\":[\"2011-12-28T00:00:00+01:00\"],\"brojDana\":[33]},{\"pocetak\":[\"2011-12-30T00:00:00+01:00\"],\"kraj\":[\"2012-01-16T00:00:00+01:00\"],\"brojDana\":[17]},{\"pocetak\":[\"2012-02-20T00:00:00+01:00\"],\"kraj\":[\"2012-03-16T00:00:00+01:00\"],\"brojDana\":[25]},{\"pocetak\":[\"2012-03-23T00:00:00+01:00\"],\"kraj\":[\"2012-05-16T00:00:00+02:00\"],\"brojDana\":[54]},{\"pocetak\":[\"2012-05-21T00:00:00+02:00\"],\"kraj\":null,\"brojDana\":[1389]}],\"isActive\":[true]},{\"datumStanja\":[\"2016-03-10T00:00:00+01:00\"],\"oib\":[\"92680516748\"],\"iban\":[\"HR8623400091110462926\"],\"blokada\":[true],\"vbdi\":[\"2340009\"],\"brojRacuna\":[\"1110462926\"],\"banka\":[\"PRIVREDNA BANKA ZAGREB d.d.\"],\"datumOtvaranja\":[\"2011-02-08T00:00:00+01:00\"],\"datumZatvaranja\":null,\"povijestBlokada\":[{\"pocetak\":[\"2012-05-30T00:00:00+02:00\"],\"kraj\":[\"2012-06-05T00:00:00+02:00\"],\"brojDana\":[6]},{\"pocetak\":[\"2012-06-21T00:00:00+02:00\"],\"kraj\":[\"2012-06-26T00:00:00+02:00\"],\"brojDana\":[5]},{\"pocetak\":[\"2012-06-29T00:00:00+02:00\"],\"kraj\":[\"2012-07-03T00:00:00+02:00\"],\"brojDana\":[4]},{\"pocetak\":[\"2012-09-06T00:00:00+02:00\"],\"kraj\":[\"2013-03-06T00:00:00+01:00\"],\"brojDana\":[181]},{\"pocetak\":[\"2014-06-09T00:00:00+02:00\"],\"kraj\":null,\"brojDana\":[640]}],\"isActive\":[true]},{\"datumStanja\":[\"2016-03-10T00:00:00+01:00\"],\"oib\":[\"18527887472\"],\"iban\":[\"HR9123400091110714260\"],\"blokada\":[false],\"vbdi\":[\"2340009\"],\"brojRacuna\":[\"1110714260\"],\"banka\":[\"PRIVREDNA BANKA ZAGREB d.d.\"],\"datumOtvaranja\":[\"2015-03-30T00:00:00+02:00\"],\"datumZatvaranja\":null,\"povijestBlokada\":[{\"pocetak\":[\"2015-03-31T00:00:00+02:00\"],\"kraj\":[\"2015-09-30T00:00:00+02:00\"],\"brojDana\":[183]}],\"isActive\":[true]},{\"datumStanja\":[\"2016-03-10T00:00:00+01:00\"],\"oib\":[\"18527887472\"],\"iban\":[\"HR5124850031100201015\"],\"blokada\":[false],\"vbdi\":[\"2485003\"],\"brojRacuna\":[\"1100201015\"],\"banka\":[\"CROATIA BANKA d.d.\"],\"datumOtvaranja\":[\"2002-02-21T00:00:00+01:00\"],\"datumZatvaranja\":[\"2009-11-06T00:00:00+01:00\"],\"povijestBlokada\":[],\"isActive\":[false]},{\"datumStanja\":[\"2016-03-10T00:00:00+01:00\"],\"oib\":[\"18527887472\"],\"iban\":[\"HR4224910051100006698\"],\"blokada\":[false],\"vbdi\":[\"2491005\"],\"brojRacuna\":[\"1100006698\"],\"banka\":[\"CREDO BANKA d.d.\"],\"datumOtvaranja\":[\"2002-04-16T00:00:00+02:00\"],\"datumZatvaranja\":[\"2009-02-17T00:00:00+01:00\"],\"povijestBlokada\":[],\"isActive\":[false]},{\"datumStanja\":[\"2016-03-10T00:00:00+01:00\"],\"oib\":[\"18527887472\"],\"iban\":[\"HR7524810001100101268\"],\"blokada\":[false],\"vbdi\":[\"2481000\"],\"brojRacuna\":[\"1100101268\"],\"banka\":[\"KREDITNA BANKA ZAGREB d.d.\"],\"datumOtvaranja\":[\"2002-06-24T00:00:00+02:00\"],\"datumZatvaranja\":[\"2009-03-18T00:00:00+01:00\"],\"povijestBlokada\":[],\"isActive\":[false]},{\"datumStanja\":[\"2016-03-10T00:00:00+01:00\"],\"oib\":[\"00045103869\"],\"iban\":[\"HR8225000091101167416\"],\"blokada\":[false],\"vbdi\":[\"2500009\"],\"brojRacuna\":[\"1101167416\"],\"banka\":[\"HYPO ALPE-ADRIA-BANK d.d.\"],\"datumOtvaranja\":[\"2004-07-19T00:00:00+02:00\"],\"datumZatvaranja\":[\"2008-08-14T00:00:00+02:00\"],\"povijestBlokada\":[],\"isActive\":[false]},{\"datumStanja\":[\"2016-03-10T00:00:00+01:00\"],\"oib\":[\"00045103869\"],\"iban\":[\"HR3223400091110156505\"],\"blokada\":[false],\"vbdi\":[\"2340009\"],\"brojRacuna\":[\"1110156505\"],\"banka\":[\"PRIVREDNA BANKA ZAGREB d.d.\"],\"datumOtvaranja\":[\"2004-09-08T00:00:00+02:00\"],\"datumZatvaranja\":[\"2009-07-13T00:00:00+02:00\"],\"povijestBlokada\":[],\"isActive\":[false]},{\"datumStanja\":[\"2016-03-10T00:00:00+01:00\"],\"oib\":[\"18527887472\"],\"iban\":[\"HR6323400091110193874\"],\"blokada\":[false],\"vbdi\":[\"2340009\"],\"brojRacuna\":[\"1110193874\"],\"banka\":[\"PRIVREDNA BANKA ZAGREB d.d.\"],\"datumOtvaranja\":[\"2005-09-28T00:00:00+02:00\"],\"datumZatvaranja\":[\"2013-09-18T00:00:00+02:00\"],\"povijestBlokada\":[],\"isActive\":[false]},{\"datumStanja\":[\"2016-03-10T00:00:00+01:00\"],\"oib\":[\"92680516748\"],\"iban\":[\"HR4223300031100429609\"],\"blokada\":[false],\"vbdi\":[\"2330003\"],\"brojRacuna\":[\"1100429609\"],\"banka\":[\"SOCIETE GENERALE - SPLITSKA BANKA d.d.\"],\"datumOtvaranja\":[\"2009-12-21T00:00:00+01:00\"],\"datumZatvaranja\":[\"2013-07-09T00:00:00+02:00\"],\"povijestBlokada\":[{\"pocetak\":[\"2012-05-30T00:00:00+02:00\"],\"kraj\":[\"2012-06-05T00:00:00+02:00\"],\"brojDana\":[6]},{\"pocetak\":[\"2012-06-21T00:00:00+02:00\"],\"kraj\":[\"2012-06-26T00:00:00+02:00\"],\"brojDana\":[5]},{\"pocetak\":[\"2012-06-29T00:00:00+02:00\"],\"kraj\":[\"2012-07-03T00:00:00+02:00\"],\"brojDana\":[4]},{\"pocetak\":[\"2012-09-06T00:00:00+02:00\"],\"kraj\":[\"2013-03-06T00:00:00+01:00\"],\"brojDana\":[181]}],\"isActive\":[false]}]", class = "json")

1 个答案:

答案 0 :(得分:0)

我设法使用库json2$povijestBlokada取消dplyr

  • 使用tbl_df
  • 将列表转换为as_data_frame
  • 选择您的嵌套元素povijestBlokada并取消它。
  • 问题是某些列中的NULL值,但用"NA"替换它们就可以了。如果您首先有NA,我认为您可以第二次取消
library(dplyr)
DT <- as_data_frame(json2)
DT1 <- DT %>% select(povijestBlokada) %>% unnest()
DT1 %>% rowwise() %>% mutate_each(funs(replace(., is.null(.), "NA")))
#> Source: local data frame [17 x 3]
#> Groups: <by row>
#> 
#>                      pocetak                      kraj brojDana
#>                        (chr)                     (chr)    (chr)
#> 1  2011-08-04T00:00:00+02:00 2011-08-10T00:00:00+02:00        6
#> 2  2011-09-06T00:00:00+02:00 2011-09-13T00:00:00+02:00        7
#> 3  2011-11-25T00:00:00+01:00 2011-12-28T00:00:00+01:00       33
#> 4  2011-12-30T00:00:00+01:00 2012-01-16T00:00:00+01:00       17
#> 5  2012-02-20T00:00:00+01:00 2012-03-16T00:00:00+01:00       25
#> 6  2012-03-23T00:00:00+01:00 2012-05-16T00:00:00+02:00       54
#> 7  2012-05-21T00:00:00+02:00                        NA     1381
#> 8  2012-05-30T00:00:00+02:00 2012-06-05T00:00:00+02:00        6
#> 9  2012-06-21T00:00:00+02:00 2012-06-26T00:00:00+02:00        5
#> 10 2012-06-29T00:00:00+02:00 2012-07-03T00:00:00+02:00        4
#> 11 2012-09-06T00:00:00+02:00 2013-03-06T00:00:00+01:00      181
#> 12 2014-06-09T00:00:00+02:00                        NA      632
#> 13 2015-03-31T00:00:00+02:00 2015-09-30T00:00:00+02:00      183
#> 14 2012-05-30T00:00:00+02:00 2012-06-05T00:00:00+02:00        6
#> 15 2012-06-21T00:00:00+02:00 2012-06-26T00:00:00+02:00        5
#> 16 2012-06-29T00:00:00+02:00 2012-07-03T00:00:00+02:00        4
#> 17 2012-09-06T00:00:00+02:00 2013-03-06T00:00:00+01:00      181